1#!/usr/bin/env python3 2 3# Copyright 2020 David Robillard <d@drobilla.net> 4# 5# Permission to use, copy, modify, and/or distribute this software for any 6# purpose with or without fee is hereby granted, provided that the above 7# copyright notice and this permission notice appear in all copies. 8# 9# THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 17""" 18Write Sphinx markup from Doxygen XML. 19 20Takes a path to a directory of XML generated by Doxygen, and emits a directory 21with a reStructuredText file for every documented symbol. 22""" 23 24import argparse 25import os 26import sys 27import textwrap 28import xml.etree.ElementTree 29 30__author__ = "David Robillard" 31__date__ = "2020-11-18" 32__email__ = "d@drobilla.net" 33__license__ = "ISC" 34__version__ = __date__.replace("-", ".") 35 36 37def load_index(index_path): 38 """ 39 Load the index from XML. 40 41 :returns: A dictionary from ID to skeleton records with basic information 42 for every documented entity. Some records have an ``xml_filename`` key 43 with the filename of a definition file. These files will be loaded later 44 to flesh out the records in the index. 45 """ 46 47 root = xml.etree.ElementTree.parse(index_path).getroot() 48 index = {} 49 50 for compound in root: 51 compound_id = compound.get("refid") 52 compound_kind = compound.get("kind") 53 compound_name = compound.find("name").text 54 if compound_kind in ["dir", "file", "page"]: 55 continue 56 57 # Add record for compound (compounds appear only once in the index) 58 assert compound_id not in index 59 index[compound_id] = { 60 "kind": compound_kind, 61 "name": compound_name, 62 "xml_filename": compound_id + ".xml", 63 "children": [], 64 } 65 66 name_prefix = ( 67 ("%s::" % compound_name) if compound_kind == "namespace" else "" 68 ) 69 70 for child in compound.findall("member"): 71 if child.get("refid") in index: 72 assert compound_kind == "group" 73 continue 74 75 # Everything has a kind and a name 76 child_record = { 77 "kind": child.get("kind"), 78 "name": name_prefix + child.find("name").text, 79 } 80 81 if child.get("kind") == "enum": 82 # Enums are not compounds, but we want to resolve the parent of 83 # their values so they are not written as top level documents 84 child_record["children"] = [] 85 86 if child.get("kind") == "enumvalue": 87 # Remove namespace prefix 88 child_record["name"] = child.find("name").text 89 90 index[child.get("refid")] = child_record 91 92 return index 93 94 95def resolve_index(index, root): 96 """ 97 Walk a definition document and extend the index for linking. 98 99 This does two things: sets the "parent" and "children" fields of all 100 applicable records, and sets the "strong" field of enums so that the 101 correct Sphinx role can be used when referring to them. 102 """ 103 104 def add_child(index, parent_id, child_id): 105 parent = index[parent_id] 106 child = index[child_id] 107 108 if child["kind"] == "enumvalue": 109 assert parent["kind"] == "enum" 110 assert "parent" not in child or child["parent"] == parent_id 111 child["parent"] = parent_id 112 113 else: 114 if parent["kind"] in ["class", "struct", "union"]: 115 assert "parent" not in child or child["parent"] == parent_id 116 child["parent"] = parent_id 117 118 if child_id not in parent["children"]: 119 parent["children"] += [child_id] 120 121 compound = root.find("compounddef") 122 compound_kind = compound.get("kind") 123 124 if compound_kind == "group": 125 for subgroup in compound.findall("innergroup"): 126 add_child(index, compound.get("id"), subgroup.get("refid")) 127 128 for klass in compound.findall("innerclass"): 129 add_child(index, compound.get("id"), klass.get("refid")) 130 131 for section in compound.findall("sectiondef"): 132 if section.get("kind").startswith("private"): 133 for member in section.findall("memberdef"): 134 if member.get("id") in index: 135 del index[member.get("id")] 136 else: 137 for member in section.findall("memberdef"): 138 member_id = member.get("id") 139 add_child(index, compound.get("id"), member_id) 140 141 if member.get("kind") == "enum": 142 index[member_id]["strong"] = member.get("strong") == "yes" 143 for value in member.findall("enumvalue"): 144 add_child(index, member_id, value.get("id")) 145 146 147def sphinx_role(record, lang): 148 """ 149 Return the Sphinx role used for a record. 150 151 This is used for the description directive like ".. c:function::", and 152 links like ":c:func:`foo`. 153 """ 154 155 kind = record["kind"] 156 157 if kind in ["class", "function", "namespace", "struct", "union"]: 158 return lang + ":" + kind 159 160 if kind == "define": 161 return "c:macro" 162 163 if kind == "enum": 164 return lang + (":enum-class" if record["strong"] else ":enum") 165 166 if kind == "typedef": 167 return lang + ":type" 168 169 if kind == "enumvalue": 170 return lang + ":enumerator" 171 172 if kind == "variable": 173 return lang + (":member" if "parent" in record else ":var") 174 175 raise RuntimeError("No known role for kind '%s'" % kind) 176 177 178def child_identifier(lang, parent_name, child_name): 179 """ 180 Return the identifier for an enum value or struct member. 181 182 Sphinx, for some reason, uses a different syntax for this in C and C++. 183 """ 184 185 separator = "::" if lang == "cpp" else "." 186 187 return "%s%s%s" % (parent_name, separator, child_name) 188 189 190def link_markup(index, lang, refid): 191 """Return a Sphinx link for a Doxygen reference.""" 192 193 record = index[refid] 194 kind, name = record["kind"], record["name"] 195 role = sphinx_role(record, lang) 196 197 if kind in ["class", "enum", "struct", "typedef", "union"]: 198 return ":%s:`%s`" % (role, name) 199 200 if kind == "function": 201 return ":%s:func:`%s`" % (lang, name) 202 203 if kind == "enumvalue": 204 parent_name = index[record["parent"]]["name"] 205 return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name)) 206 207 if kind == "variable": 208 if "parent" not in record: 209 return ":%s:var:`%s`" % (lang, name) 210 211 parent_name = index[record["parent"]]["name"] 212 return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name)) 213 214 raise RuntimeError("Unknown link target kind: %s" % kind) 215 216 217def indent(markup, depth): 218 """ 219 Indent markup to a depth level. 220 221 Like textwrap.indent() but takes an integer and works in reST indentation 222 levels for clarity." 223 """ 224 225 return textwrap.indent(markup, " " * depth) 226 227 228def heading(text, level): 229 """ 230 Return a ReST heading at a given level. 231 232 Follows the style in the Python documentation guide, see 233 <https://devguide.python.org/documenting/#sections>. 234 """ 235 236 assert 1 <= level <= 6 237 238 chars = ("#", "*", "=", "-", "^", '"') 239 line = chars[level] * len(text) 240 241 return "%s\n%s\n%s\n\n" % (line if level < 3 else "", text, line) 242 243 244def dox_to_rst(index, lang, node): 245 """ 246 Convert documentation commands (docCmdGroup) to Sphinx markup. 247 248 This is used to convert the content of descriptions in the documentation. 249 It recursively parses all children tags and raises a RuntimeError if any 250 unknown tag is encountered. 251 """ 252 253 def field_value(markup): 254 """Return a value for a field as a single line or indented block.""" 255 if "\n" in markup.strip(): 256 return "\n" + indent(markup, 1) 257 258 return " " + markup.strip() 259 260 if node.tag == "computeroutput": 261 assert len(node) == 0 262 return "``%s``" % node.text 263 264 if node.tag == "itemizedlist": 265 markup = "" 266 for item in node.findall("listitem"): 267 assert len(item) == 1 268 markup += "\n- %s" % dox_to_rst(index, lang, item[0]) 269 270 return markup 271 272 if node.tag == "para": 273 markup = node.text if node.text is not None else "" 274 for child in node: 275 markup += dox_to_rst(index, lang, child) 276 markup += child.tail if child.tail is not None else "" 277 278 return markup.strip() + "\n\n" 279 280 if node.tag == "parameterlist": 281 markup = "" 282 for item in node.findall("parameteritem"): 283 name = item.find("parameternamelist/parametername") 284 description = item.find("parameterdescription") 285 assert len(description) == 1 286 markup += "\n\n:param %s:%s" % ( 287 name.text, 288 field_value(dox_to_rst(index, lang, description[0])), 289 ) 290 291 return markup + "\n" 292 293 if node.tag == "programlisting": 294 return "\n.. code-block:: %s\n\n%s" % ( 295 lang, 296 indent(plain_text(node), 1), 297 ) 298 299 if node.tag == "ref": 300 refid = node.get("refid") 301 if refid not in index: 302 sys.stderr.write("warning: Unresolved link: %s\n" % refid) 303 return node.text 304 305 assert len(node) == 0 306 assert len(link_markup(index, lang, refid)) > 0 307 return link_markup(index, lang, refid) 308 309 if node.tag == "simplesect": 310 assert len(node) == 1 311 312 if node.get("kind") == "return": 313 return "\n:returns:" + field_value( 314 dox_to_rst(index, lang, node[0]) 315 ) 316 317 if node.get("kind") == "see": 318 return dox_to_rst(index, lang, node[0]) 319 320 raise RuntimeError("Unknown simplesect kind: %s" % node.get("kind")) 321 322 if node.tag == "ulink": 323 return "`%s <%s>`_" % (node.text, node.get("url")) 324 325 raise RuntimeError("Unknown documentation command: %s" % node.tag) 326 327 328def description_markup(index, lang, node): 329 """Return the markup for a brief or detailed description.""" 330 331 assert node.tag == "briefdescription" or node.tag == "detaileddescription" 332 assert not (node.tag == "briefdescription" and len(node) > 1) 333 assert len(node.text.strip()) == 0 334 335 return "".join([dox_to_rst(index, lang, child) for child in node]) 336 337 338def set_descriptions(index, lang, definition, record): 339 """Set a record's brief/detailed descriptions from the XML definition.""" 340 341 for tag in ["briefdescription", "detaileddescription"]: 342 node = definition.find(tag) 343 if node is not None: 344 record[tag] = description_markup(index, lang, node) 345 346 347def set_template_params(node, record): 348 """Set a record's template_params from the XML definition.""" 349 350 template_param_list = node.find("templateparamlist") 351 if template_param_list is not None: 352 params = [] 353 for param in template_param_list.findall("param"): 354 if param.find("declname") is not None: 355 # Value parameter 356 type_text = plain_text(param.find("type")) 357 name_text = plain_text(param.find("declname")) 358 359 params += ["%s %s" % (type_text, name_text)] 360 else: 361 # Type parameter 362 params += ["%s" % (plain_text(param.find("type")))] 363 364 record["template_params"] = "%s" % ", ".join(params) 365 366 367def plain_text(node): 368 """ 369 Return the plain text of a node with all tags ignored. 370 371 This is needed where Doxygen may include refs but Sphinx needs plain text 372 because it parses things itself to generate links. 373 """ 374 375 if node.tag == "sp": 376 markup = " " 377 elif node.text is not None: 378 markup = node.text 379 else: 380 markup = "" 381 382 for child in node: 383 markup += plain_text(child) 384 markup += child.tail if child.tail is not None else "" 385 386 return markup 387 388 389def local_name(name): 390 """Return a name with all namespace prefixes stripped.""" 391 392 return name[name.rindex("::") + 2 :] if "::" in name else name 393 394 395def read_definition_doc(index, lang, root): 396 """Walk a definition document and update described records in the index.""" 397 398 # Set descriptions for the compound itself 399 compound = root.find("compounddef") 400 compound_record = index[compound.get("id")] 401 set_descriptions(index, lang, compound, compound_record) 402 set_template_params(compound, compound_record) 403 404 if compound.find("title") is not None: 405 compound_record["title"] = compound.find("title").text.strip() 406 407 # Set documentation for all children 408 for section in compound.findall("sectiondef"): 409 if section.get("kind").startswith("private"): 410 continue 411 412 for member in section.findall("memberdef"): 413 kind = member.get("kind") 414 record = index[member.get("id")] 415 set_descriptions(index, lang, member, record) 416 set_template_params(member, record) 417 418 if compound.get("kind") in ["class", "struct", "union"]: 419 assert kind in ["function", "typedef", "variable"] 420 record["type"] = plain_text(member.find("type")) 421 422 if kind == "enum": 423 for value in member.findall("enumvalue"): 424 set_descriptions( 425 index, lang, value, index[value.get("id")] 426 ) 427 428 elif kind == "function": 429 record["prototype"] = "%s %s%s" % ( 430 plain_text(member.find("type")), 431 member.find("name").text, 432 member.find("argsstring").text, 433 ) 434 435 elif kind == "typedef": 436 name = local_name(record["name"]) 437 args_text = member.find("argsstring").text 438 target_text = plain_text(member.find("type")) 439 if args_text is not None: # Function pointer 440 assert target_text[-2:] == "(*" and args_text[0] == ")" 441 record["type"] = target_text + args_text 442 record["definition"] = target_text + name + args_text 443 else: # Normal named typedef 444 assert target_text is not None 445 record["type"] = target_text 446 if member.find("definition").text.startswith("using"): 447 record["definition"] = "%s = %s" % ( 448 name, 449 target_text, 450 ) 451 else: 452 record["definition"] = "%s %s" % ( 453 target_text, 454 name, 455 ) 456 457 458def declaration_string(record): 459 """ 460 Return the string that describes a declaration. 461 462 This is what follows the directive, and is in C/C++ syntax, except without 463 keywords like "typedef" and "using" as expected by Sphinx. For example, 464 "struct ThingImpl Thing" or "void run(int value)". 465 """ 466 467 kind = record["kind"] 468 result = "" 469 470 if "template_params" in record: 471 result = "template <%s> " % record["template_params"] 472 473 if kind == "function": 474 result += record["prototype"] 475 elif kind == "typedef": 476 result += record["definition"] 477 elif "type" in record: 478 result += "%s %s" % (record["type"], local_name(record["name"])) 479 else: 480 result += local_name(record["name"]) 481 482 return result 483 484 485def document_markup(index, lang, record): 486 """Return the complete document that describes some documented entity.""" 487 488 kind = record["kind"] 489 role = sphinx_role(record, lang) 490 name = record["name"] 491 markup = "" 492 493 if name != local_name(name): 494 markup += ".. cpp:namespace:: %s\n\n" % name[0 : name.rindex("::")] 495 496 # Write top-level directive 497 markup += ".. %s:: %s\n" % (role, declaration_string(record)) 498 499 # Write main description blurb 500 markup += "\n" 501 markup += indent(record["briefdescription"], 1) 502 markup += indent(record["detaileddescription"], 1) 503 504 assert ( 505 kind in ["class", "enum", "namespace", "struct", "union"] 506 or "children" not in record 507 ) 508 509 # Sphinx C++ namespaces work by setting a scope, they have no content 510 child_indent = 0 if kind == "namespace" else 1 511 512 # Write inline children if applicable 513 markup += "\n" 514 for child_id in record.get("children", []): 515 child_record = index[child_id] 516 child_role = sphinx_role(child_record, lang) 517 518 child_header = ".. %s:: %s\n\n" % ( 519 child_role, 520 declaration_string(child_record), 521 ) 522 523 markup += "\n" 524 markup += indent(child_header, child_indent) 525 markup += indent(child_record["briefdescription"], child_indent + 1) 526 markup += indent(child_record["detaileddescription"], child_indent + 1) 527 markup += "\n" 528 529 return markup 530 531 532def symbol_filename(name): 533 """Adapt the name of a symbol to be suitable for use as a filename.""" 534 535 return name.replace("::", "__") 536 537 538def emit_symbols(index, lang, symbol_dir, force): 539 """Write a description file for every symbol documented in the index.""" 540 541 for record in index.values(): 542 if ( 543 record["kind"] in ["group", "namespace"] 544 or "parent" in record 545 and index[record["parent"]]["kind"] != "group" 546 ): 547 continue 548 549 name = record["name"] 550 filename = os.path.join(symbol_dir, symbol_filename("%s.rst" % name)) 551 if not force and os.path.exists(filename): 552 raise FileExistsError("File already exists: '%s'" % filename) 553 554 with open(filename, "w") as rst: 555 rst.write(heading(local_name(name), 3)) 556 rst.write(document_markup(index, lang, record)) 557 558 559def emit_groups(index, output_dir, symbol_dir_name, force): 560 """Write a description file for every group documented in the index.""" 561 562 for record in index.values(): 563 if record["kind"] != "group": 564 continue 565 566 name = record["name"] 567 filename = os.path.join(output_dir, "%s.rst" % name) 568 if not force and os.path.exists(filename): 569 raise FileExistsError("File already exists: '%s'" % filename) 570 571 with open(filename, "w") as rst: 572 rst.write(heading(record["title"], 2)) 573 574 # Get all child group and symbol names 575 group_names = [] 576 symbol_names = [] 577 for child_id in record["children"]: 578 child = index[child_id] 579 if child["kind"] == "group": 580 group_names += [child["name"]] 581 else: 582 symbol_names += [child["name"]] 583 584 # Emit description (document body) 585 rst.write(record["briefdescription"] + "\n\n") 586 rst.write(record["detaileddescription"] + "\n\n") 587 588 # Emit TOC 589 rst.write(".. toctree::\n") 590 591 # Emit groups at the top of the TOC 592 for group_name in group_names: 593 rst.write("\n" + indent(group_name, 1)) 594 595 # Emit symbols in sorted order 596 for symbol_name in sorted(symbol_names): 597 path = "/".join( 598 [symbol_dir_name, symbol_filename(symbol_name)] 599 ) 600 rst.write("\n" + indent(path, 1)) 601 602 rst.write("\n") 603 604 605def run(index_xml_path, output_dir, symbol_dir_name, language, force): 606 """Write a directory of Sphinx files from a Doxygen XML directory.""" 607 608 # Build skeleton index from index.xml 609 xml_dir = os.path.dirname(index_xml_path) 610 index = load_index(index_xml_path) 611 612 # Load all definition documents 613 definition_docs = [] 614 for record in index.values(): 615 if "xml_filename" in record: 616 xml_path = os.path.join(xml_dir, record["xml_filename"]) 617 definition_docs += [xml.etree.ElementTree.parse(xml_path)] 618 619 # Do an initial pass of the definition documents to resolve the index 620 for root in definition_docs: 621 resolve_index(index, root) 622 623 # Finally read the documentation from definition documents 624 for root in definition_docs: 625 read_definition_doc(index, language, root) 626 627 # Emit output files 628 symbol_dir = os.path.join(output_dir, symbol_dir_name) 629 os.makedirs(symbol_dir, exist_ok=True) 630 emit_symbols(index, language, symbol_dir, force) 631 emit_groups(index, output_dir, symbol_dir_name, force) 632 633 634if __name__ == "__main__": 635 ap = argparse.ArgumentParser( 636 usage="%(prog)s [OPTION]... XML_DIR OUTPUT_DIR", 637 description=__doc__, 638 formatter_class=argparse.RawDescriptionHelpFormatter, 639 ) 640 641 ap.add_argument( 642 "-f", 643 "--force", 644 action="store_true", 645 help="overwrite files", 646 ) 647 648 ap.add_argument( 649 "-l", 650 "--language", 651 default="c", 652 choices=["c", "cpp"], 653 help="language domain for output", 654 ) 655 656 ap.add_argument( 657 "-s", 658 "--symbol-dir-name", 659 default="symbols", 660 help="name for subdirectory of symbol documentation files", 661 ) 662 663 ap.add_argument("index_xml_path", help="path index.xml from Doxygen") 664 ap.add_argument("output_dir", help="output directory") 665 666 run(**vars(ap.parse_args(sys.argv[1:]))) 667