1#!/usr/bin/env python3
2
3# Copyright 2020 David Robillard <d@drobilla.net>
4#
5# Permission to use, copy, modify, and/or distribute this software for any
6# purpose with or without fee is hereby granted, provided that the above
7# copyright notice and this permission notice appear in all copies.
8#
9# THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17"""
18Write Sphinx markup from Doxygen XML.
19
20Takes a path to a directory of XML generated by Doxygen, and emits a directory
21with a reStructuredText file for every documented symbol.
22"""
23
24import argparse
25import os
26import sys
27import textwrap
28import xml.etree.ElementTree
29
30__author__ = "David Robillard"
31__date__ = "2020-11-18"
32__email__ = "d@drobilla.net"
33__license__ = "ISC"
34__version__ = __date__.replace("-", ".")
35
36
37def load_index(index_path):
38    """
39    Load the index from XML.
40
41    :returns: A dictionary from ID to skeleton records with basic information
42    for every documented entity.  Some records have an ``xml_filename`` key
43    with the filename of a definition file.  These files will be loaded later
44    to flesh out the records in the index.
45    """
46
47    root = xml.etree.ElementTree.parse(index_path).getroot()
48    index = {}
49
50    for compound in root:
51        compound_id = compound.get("refid")
52        compound_kind = compound.get("kind")
53        compound_name = compound.find("name").text
54        if compound_kind in ["dir", "file", "page"]:
55            continue
56
57        # Add record for compound (compounds appear only once in the index)
58        assert compound_id not in index
59        index[compound_id] = {
60            "kind": compound_kind,
61            "name": compound_name,
62            "xml_filename": compound_id + ".xml",
63            "children": [],
64        }
65
66        name_prefix = (
67            ("%s::" % compound_name) if compound_kind == "namespace" else ""
68        )
69
70        for child in compound.findall("member"):
71            if child.get("refid") in index:
72                assert compound_kind == "group"
73                continue
74
75            # Everything has a kind and a name
76            child_record = {
77                "kind": child.get("kind"),
78                "name": name_prefix + child.find("name").text,
79            }
80
81            if child.get("kind") == "enum":
82                # Enums are not compounds, but we want to resolve the parent of
83                # their values so they are not written as top level documents
84                child_record["children"] = []
85
86            if child.get("kind") == "enumvalue":
87                # Remove namespace prefix
88                child_record["name"] = child.find("name").text
89
90            index[child.get("refid")] = child_record
91
92    return index
93
94
95def resolve_index(index, root):
96    """
97    Walk a definition document and extend the index for linking.
98
99    This does two things: sets the "parent" and "children" fields of all
100    applicable records, and sets the "strong" field of enums so that the
101    correct Sphinx role can be used when referring to them.
102    """
103
104    def add_child(index, parent_id, child_id):
105        parent = index[parent_id]
106        child = index[child_id]
107
108        if child["kind"] == "enumvalue":
109            assert parent["kind"] == "enum"
110            assert "parent" not in child or child["parent"] == parent_id
111            child["parent"] = parent_id
112
113        else:
114            if parent["kind"] in ["class", "struct", "union"]:
115                assert "parent" not in child or child["parent"] == parent_id
116                child["parent"] = parent_id
117
118        if child_id not in parent["children"]:
119            parent["children"] += [child_id]
120
121    compound = root.find("compounddef")
122    compound_kind = compound.get("kind")
123
124    if compound_kind == "group":
125        for subgroup in compound.findall("innergroup"):
126            add_child(index, compound.get("id"), subgroup.get("refid"))
127
128        for klass in compound.findall("innerclass"):
129            add_child(index, compound.get("id"), klass.get("refid"))
130
131    for section in compound.findall("sectiondef"):
132        if section.get("kind").startswith("private"):
133            for member in section.findall("memberdef"):
134                if member.get("id") in index:
135                    del index[member.get("id")]
136        else:
137            for member in section.findall("memberdef"):
138                member_id = member.get("id")
139                add_child(index, compound.get("id"), member_id)
140
141                if member.get("kind") == "enum":
142                    index[member_id]["strong"] = member.get("strong") == "yes"
143                    for value in member.findall("enumvalue"):
144                        add_child(index, member_id, value.get("id"))
145
146
147def sphinx_role(record, lang):
148    """
149    Return the Sphinx role used for a record.
150
151    This is used for the description directive like ".. c:function::", and
152    links like ":c:func:`foo`.
153    """
154
155    kind = record["kind"]
156
157    if kind in ["class", "function", "namespace", "struct", "union"]:
158        return lang + ":" + kind
159
160    if kind == "define":
161        return "c:macro"
162
163    if kind == "enum":
164        return lang + (":enum-class" if record["strong"] else ":enum")
165
166    if kind == "typedef":
167        return lang + ":type"
168
169    if kind == "enumvalue":
170        return lang + ":enumerator"
171
172    if kind == "variable":
173        return lang + (":member" if "parent" in record else ":var")
174
175    raise RuntimeError("No known role for kind '%s'" % kind)
176
177
178def child_identifier(lang, parent_name, child_name):
179    """
180    Return the identifier for an enum value or struct member.
181
182    Sphinx, for some reason, uses a different syntax for this in C and C++.
183    """
184
185    separator = "::" if lang == "cpp" else "."
186
187    return "%s%s%s" % (parent_name, separator, child_name)
188
189
190def link_markup(index, lang, refid):
191    """Return a Sphinx link for a Doxygen reference."""
192
193    record = index[refid]
194    kind, name = record["kind"], record["name"]
195    role = sphinx_role(record, lang)
196
197    if kind in ["class", "enum", "struct", "typedef", "union"]:
198        return ":%s:`%s`" % (role, name)
199
200    if kind == "function":
201        return ":%s:func:`%s`" % (lang, name)
202
203    if kind == "enumvalue":
204        parent_name = index[record["parent"]]["name"]
205        return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name))
206
207    if kind == "variable":
208        if "parent" not in record:
209            return ":%s:var:`%s`" % (lang, name)
210
211        parent_name = index[record["parent"]]["name"]
212        return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name))
213
214    raise RuntimeError("Unknown link target kind: %s" % kind)
215
216
217def indent(markup, depth):
218    """
219    Indent markup to a depth level.
220
221    Like textwrap.indent() but takes an integer and works in reST indentation
222    levels for clarity."
223    """
224
225    return textwrap.indent(markup, "   " * depth)
226
227
228def heading(text, level):
229    """
230    Return a ReST heading at a given level.
231
232    Follows the style in the Python documentation guide, see
233    <https://devguide.python.org/documenting/#sections>.
234    """
235
236    assert 1 <= level <= 6
237
238    chars = ("#", "*", "=", "-", "^", '"')
239    line = chars[level] * len(text)
240
241    return "%s\n%s\n%s\n\n" % (line if level < 3 else "", text, line)
242
243
244def dox_to_rst(index, lang, node):
245    """
246    Convert documentation commands (docCmdGroup) to Sphinx markup.
247
248    This is used to convert the content of descriptions in the documentation.
249    It recursively parses all children tags and raises a RuntimeError if any
250    unknown tag is encountered.
251    """
252
253    def field_value(markup):
254        """Return a value for a field as a single line or indented block."""
255        if "\n" in markup.strip():
256            return "\n" + indent(markup, 1)
257
258        return " " + markup.strip()
259
260    if node.tag == "computeroutput":
261        assert len(node) == 0
262        return "``%s``" % node.text
263
264    if node.tag == "itemizedlist":
265        markup = ""
266        for item in node.findall("listitem"):
267            assert len(item) == 1
268            markup += "\n- %s" % dox_to_rst(index, lang, item[0])
269
270        return markup
271
272    if node.tag == "para":
273        markup = node.text if node.text is not None else ""
274        for child in node:
275            markup += dox_to_rst(index, lang, child)
276            markup += child.tail if child.tail is not None else ""
277
278        return markup.strip() + "\n\n"
279
280    if node.tag == "parameterlist":
281        markup = ""
282        for item in node.findall("parameteritem"):
283            name = item.find("parameternamelist/parametername")
284            description = item.find("parameterdescription")
285            assert len(description) == 1
286            markup += "\n\n:param %s:%s" % (
287                name.text,
288                field_value(dox_to_rst(index, lang, description[0])),
289            )
290
291        return markup + "\n"
292
293    if node.tag == "programlisting":
294        return "\n.. code-block:: %s\n\n%s" % (
295            lang,
296            indent(plain_text(node), 1),
297        )
298
299    if node.tag == "ref":
300        refid = node.get("refid")
301        if refid not in index:
302            sys.stderr.write("warning: Unresolved link: %s\n" % refid)
303            return node.text
304
305        assert len(node) == 0
306        assert len(link_markup(index, lang, refid)) > 0
307        return link_markup(index, lang, refid)
308
309    if node.tag == "simplesect":
310        assert len(node) == 1
311
312        if node.get("kind") == "return":
313            return "\n:returns:" + field_value(
314                dox_to_rst(index, lang, node[0])
315            )
316
317        if node.get("kind") == "see":
318            return dox_to_rst(index, lang, node[0])
319
320        raise RuntimeError("Unknown simplesect kind: %s" % node.get("kind"))
321
322    if node.tag == "ulink":
323        return "`%s <%s>`_" % (node.text, node.get("url"))
324
325    raise RuntimeError("Unknown documentation command: %s" % node.tag)
326
327
328def description_markup(index, lang, node):
329    """Return the markup for a brief or detailed description."""
330
331    assert node.tag == "briefdescription" or node.tag == "detaileddescription"
332    assert not (node.tag == "briefdescription" and len(node) > 1)
333    assert len(node.text.strip()) == 0
334
335    return "".join([dox_to_rst(index, lang, child) for child in node])
336
337
338def set_descriptions(index, lang, definition, record):
339    """Set a record's brief/detailed descriptions from the XML definition."""
340
341    for tag in ["briefdescription", "detaileddescription"]:
342        node = definition.find(tag)
343        if node is not None:
344            record[tag] = description_markup(index, lang, node)
345
346
347def set_template_params(node, record):
348    """Set a record's template_params from the XML definition."""
349
350    template_param_list = node.find("templateparamlist")
351    if template_param_list is not None:
352        params = []
353        for param in template_param_list.findall("param"):
354            if param.find("declname") is not None:
355                # Value parameter
356                type_text = plain_text(param.find("type"))
357                name_text = plain_text(param.find("declname"))
358
359                params += ["%s %s" % (type_text, name_text)]
360            else:
361                # Type parameter
362                params += ["%s" % (plain_text(param.find("type")))]
363
364        record["template_params"] = "%s" % ", ".join(params)
365
366
367def plain_text(node):
368    """
369    Return the plain text of a node with all tags ignored.
370
371    This is needed where Doxygen may include refs but Sphinx needs plain text
372    because it parses things itself to generate links.
373    """
374
375    if node.tag == "sp":
376        markup = " "
377    elif node.text is not None:
378        markup = node.text
379    else:
380        markup = ""
381
382    for child in node:
383        markup += plain_text(child)
384        markup += child.tail if child.tail is not None else ""
385
386    return markup
387
388
389def local_name(name):
390    """Return a name with all namespace prefixes stripped."""
391
392    return name[name.rindex("::") + 2 :] if "::" in name else name
393
394
395def read_definition_doc(index, lang, root):
396    """Walk a definition document and update described records in the index."""
397
398    # Set descriptions for the compound itself
399    compound = root.find("compounddef")
400    compound_record = index[compound.get("id")]
401    set_descriptions(index, lang, compound, compound_record)
402    set_template_params(compound, compound_record)
403
404    if compound.find("title") is not None:
405        compound_record["title"] = compound.find("title").text.strip()
406
407    # Set documentation for all children
408    for section in compound.findall("sectiondef"):
409        if section.get("kind").startswith("private"):
410            continue
411
412        for member in section.findall("memberdef"):
413            kind = member.get("kind")
414            record = index[member.get("id")]
415            set_descriptions(index, lang, member, record)
416            set_template_params(member, record)
417
418            if compound.get("kind") in ["class", "struct", "union"]:
419                assert kind in ["function", "typedef", "variable"]
420                record["type"] = plain_text(member.find("type"))
421
422            if kind == "enum":
423                for value in member.findall("enumvalue"):
424                    set_descriptions(
425                        index, lang, value, index[value.get("id")]
426                    )
427
428            elif kind == "function":
429                record["prototype"] = "%s %s%s" % (
430                    plain_text(member.find("type")),
431                    member.find("name").text,
432                    member.find("argsstring").text,
433                )
434
435            elif kind == "typedef":
436                name = local_name(record["name"])
437                args_text = member.find("argsstring").text
438                target_text = plain_text(member.find("type"))
439                if args_text is not None:  # Function pointer
440                    assert target_text[-2:] == "(*" and args_text[0] == ")"
441                    record["type"] = target_text + args_text
442                    record["definition"] = target_text + name + args_text
443                else:  # Normal named typedef
444                    assert target_text is not None
445                    record["type"] = target_text
446                    if member.find("definition").text.startswith("using"):
447                        record["definition"] = "%s = %s" % (
448                            name,
449                            target_text,
450                        )
451                    else:
452                        record["definition"] = "%s %s" % (
453                            target_text,
454                            name,
455                        )
456
457
458def declaration_string(record):
459    """
460    Return the string that describes a declaration.
461
462    This is what follows the directive, and is in C/C++ syntax, except without
463    keywords like "typedef" and "using" as expected by Sphinx.  For example,
464    "struct ThingImpl Thing" or "void run(int value)".
465    """
466
467    kind = record["kind"]
468    result = ""
469
470    if "template_params" in record:
471        result = "template <%s> " % record["template_params"]
472
473    if kind == "function":
474        result += record["prototype"]
475    elif kind == "typedef":
476        result += record["definition"]
477    elif "type" in record:
478        result += "%s %s" % (record["type"], local_name(record["name"]))
479    else:
480        result += local_name(record["name"])
481
482    return result
483
484
485def document_markup(index, lang, record):
486    """Return the complete document that describes some documented entity."""
487
488    kind = record["kind"]
489    role = sphinx_role(record, lang)
490    name = record["name"]
491    markup = ""
492
493    if name != local_name(name):
494        markup += ".. cpp:namespace:: %s\n\n" % name[0 : name.rindex("::")]
495
496    # Write top-level directive
497    markup += ".. %s:: %s\n" % (role, declaration_string(record))
498
499    # Write main description blurb
500    markup += "\n"
501    markup += indent(record["briefdescription"], 1)
502    markup += indent(record["detaileddescription"], 1)
503
504    assert (
505        kind in ["class", "enum", "namespace", "struct", "union"]
506        or "children" not in record
507    )
508
509    # Sphinx C++ namespaces work by setting a scope, they have no content
510    child_indent = 0 if kind == "namespace" else 1
511
512    # Write inline children if applicable
513    markup += "\n"
514    for child_id in record.get("children", []):
515        child_record = index[child_id]
516        child_role = sphinx_role(child_record, lang)
517
518        child_header = ".. %s:: %s\n\n" % (
519            child_role,
520            declaration_string(child_record),
521        )
522
523        markup += "\n"
524        markup += indent(child_header, child_indent)
525        markup += indent(child_record["briefdescription"], child_indent + 1)
526        markup += indent(child_record["detaileddescription"], child_indent + 1)
527        markup += "\n"
528
529    return markup
530
531
532def symbol_filename(name):
533    """Adapt the name of a symbol to be suitable for use as a filename."""
534
535    return name.replace("::", "__")
536
537
538def emit_symbols(index, lang, symbol_dir, force):
539    """Write a description file for every symbol documented in the index."""
540
541    for record in index.values():
542        if (
543            record["kind"] in ["group", "namespace"]
544            or "parent" in record
545            and index[record["parent"]]["kind"] != "group"
546        ):
547            continue
548
549        name = record["name"]
550        filename = os.path.join(symbol_dir, symbol_filename("%s.rst" % name))
551        if not force and os.path.exists(filename):
552            raise FileExistsError("File already exists: '%s'" % filename)
553
554        with open(filename, "w") as rst:
555            rst.write(heading(local_name(name), 3))
556            rst.write(document_markup(index, lang, record))
557
558
559def emit_groups(index, output_dir, symbol_dir_name, force):
560    """Write a description file for every group documented in the index."""
561
562    for record in index.values():
563        if record["kind"] != "group":
564            continue
565
566        name = record["name"]
567        filename = os.path.join(output_dir, "%s.rst" % name)
568        if not force and os.path.exists(filename):
569            raise FileExistsError("File already exists: '%s'" % filename)
570
571        with open(filename, "w") as rst:
572            rst.write(heading(record["title"], 2))
573
574            # Get all child group and symbol names
575            group_names = []
576            symbol_names = []
577            for child_id in record["children"]:
578                child = index[child_id]
579                if child["kind"] == "group":
580                    group_names += [child["name"]]
581                else:
582                    symbol_names += [child["name"]]
583
584            # Emit description (document body)
585            rst.write(record["briefdescription"] + "\n\n")
586            rst.write(record["detaileddescription"] + "\n\n")
587
588            # Emit TOC
589            rst.write(".. toctree::\n")
590
591            # Emit groups at the top of the TOC
592            for group_name in group_names:
593                rst.write("\n" + indent(group_name, 1))
594
595            # Emit symbols in sorted order
596            for symbol_name in sorted(symbol_names):
597                path = "/".join(
598                    [symbol_dir_name, symbol_filename(symbol_name)]
599                )
600                rst.write("\n" + indent(path, 1))
601
602            rst.write("\n")
603
604
605def run(index_xml_path, output_dir, symbol_dir_name, language, force):
606    """Write a directory of Sphinx files from a Doxygen XML directory."""
607
608    # Build skeleton index from index.xml
609    xml_dir = os.path.dirname(index_xml_path)
610    index = load_index(index_xml_path)
611
612    # Load all definition documents
613    definition_docs = []
614    for record in index.values():
615        if "xml_filename" in record:
616            xml_path = os.path.join(xml_dir, record["xml_filename"])
617            definition_docs += [xml.etree.ElementTree.parse(xml_path)]
618
619    # Do an initial pass of the definition documents to resolve the index
620    for root in definition_docs:
621        resolve_index(index, root)
622
623    # Finally read the documentation from definition documents
624    for root in definition_docs:
625        read_definition_doc(index, language, root)
626
627    # Emit output files
628    symbol_dir = os.path.join(output_dir, symbol_dir_name)
629    os.makedirs(symbol_dir, exist_ok=True)
630    emit_symbols(index, language, symbol_dir, force)
631    emit_groups(index, output_dir, symbol_dir_name, force)
632
633
634if __name__ == "__main__":
635    ap = argparse.ArgumentParser(
636        usage="%(prog)s [OPTION]... XML_DIR OUTPUT_DIR",
637        description=__doc__,
638        formatter_class=argparse.RawDescriptionHelpFormatter,
639    )
640
641    ap.add_argument(
642        "-f",
643        "--force",
644        action="store_true",
645        help="overwrite files",
646    )
647
648    ap.add_argument(
649        "-l",
650        "--language",
651        default="c",
652        choices=["c", "cpp"],
653        help="language domain for output",
654    )
655
656    ap.add_argument(
657        "-s",
658        "--symbol-dir-name",
659        default="symbols",
660        help="name for subdirectory of symbol documentation files",
661    )
662
663    ap.add_argument("index_xml_path", help="path index.xml from Doxygen")
664    ap.add_argument("output_dir", help="output directory")
665
666    run(**vars(ap.parse_args(sys.argv[1:])))
667