1#!/usr/bin/env python3 2 3# Copyright 2020 David Robillard <d@drobilla.net> 4# 5# Permission to use, copy, modify, and/or distribute this software for any 6# purpose with or without fee is hereby granted, provided that the above 7# copyright notice and this permission notice appear in all copies. 8# 9# THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 17""" 18Write Sphinx markup from Doxygen XML. 19 20Takes a path to a directory of XML generated by Doxygen, and emits a directory 21with a reStructuredText file for every documented symbol. 22""" 23 24import argparse 25import os 26import sys 27import textwrap 28import xml.etree.ElementTree 29 30__author__ = "David Robillard" 31__date__ = "2020-11-18" 32__email__ = "d@drobilla.net" 33__license__ = "ISC" 34__version__ = __date__.replace("-", ".") 35 36 37def load_index(index_path): 38 """ 39 Load the index from XML. 40 41 :returns: A dictionary from ID to skeleton records with basic information 42 for every documented entity. Some records have an ``xml_filename`` key 43 with the filename of a definition file. These files will be loaded later 44 to flesh out the records in the index. 45 """ 46 47 root = xml.etree.ElementTree.parse(index_path).getroot() 48 index = {} 49 50 for compound in root: 51 compound_id = compound.get("refid") 52 compound_kind = compound.get("kind") 53 compound_name = compound.find("name").text 54 if compound_kind in ["dir", "file", "page"]: 55 continue 56 57 # Add record for compound (compounds appear only once in the index) 58 assert compound_id not in index 59 index[compound_id] = { 60 "kind": compound_kind, 61 "name": compound_name, 62 "xml_filename": compound_id + ".xml", 63 "children": [], 64 } 65 66 name_prefix = ( 67 ("%s::" % compound_name) if compound_kind == "namespace" else "" 68 ) 69 70 for child in compound.findall("member"): 71 if child.get("refid") in index: 72 assert compound_kind == "group" 73 continue 74 75 # Everything has a kind and a name 76 child_record = { 77 "kind": child.get("kind"), 78 "name": name_prefix + child.find("name").text, 79 } 80 81 if child.get("kind") == "enum": 82 # Enums are not compounds, but we want to resolve the parent of 83 # their values so they are not written as top level documents 84 child_record["children"] = [] 85 86 if child.get("kind") == "enumvalue": 87 # Remove namespace prefix 88 child_record["name"] = child.find("name").text 89 90 index[child.get("refid")] = child_record 91 92 return index 93 94 95def resolve_index(index, root): 96 """ 97 Walk a definition document and extend the index for linking. 98 99 This does two things: sets the "parent" and "children" fields of all 100 applicable records, and sets the "strong" field of enums so that the 101 correct Sphinx role can be used when referring to them. 102 """ 103 104 def add_child(index, parent_id, child_id): 105 parent = index[parent_id] 106 child = index[child_id] 107 108 if child["kind"] == "enumvalue": 109 assert parent["kind"] == "enum" 110 assert "parent" not in child or child["parent"] == parent_id 111 child["parent"] = parent_id 112 113 else: 114 if parent["kind"] in ["class", "struct", "union"]: 115 assert "parent" not in child or child["parent"] == parent_id 116 child["parent"] = parent_id 117 118 if child_id not in parent["children"]: 119 parent["children"] += [child_id] 120 121 compound = root.find("compounddef") 122 compound_kind = compound.get("kind") 123 124 if compound_kind == "group": 125 for subgroup in compound.findall("innergroup"): 126 add_child(index, compound.get("id"), subgroup.get("refid")) 127 128 for klass in compound.findall("innerclass"): 129 add_child(index, compound.get("id"), klass.get("refid")) 130 131 for section in compound.findall("sectiondef"): 132 if section.get("kind").startswith("private"): 133 for member in section.findall("memberdef"): 134 if member.get("id") in index: 135 del index[member.get("id")] 136 else: 137 for member in section.findall("memberdef"): 138 member_id = member.get("id") 139 add_child(index, compound.get("id"), member_id) 140 141 if member.get("kind") == "enum": 142 index[member_id]["strong"] = member.get("strong") == "yes" 143 for value in member.findall("enumvalue"): 144 add_child(index, member_id, value.get("id")) 145 146 147def sphinx_role(record, lang): 148 """ 149 Return the Sphinx role used for a record. 150 151 This is used for the description directive like ".. c:function::", and 152 links like ":c:func:`foo`. 153 """ 154 155 kind = record["kind"] 156 157 if kind in ["class", "function", "namespace", "struct", "union"]: 158 return lang + ":" + kind 159 160 if kind == "define": 161 return "c:macro" 162 163 if kind == "enum": 164 return lang + (":enum-class" if record["strong"] else ":enum") 165 166 if kind == "typedef": 167 return lang + ":type" 168 169 if kind == "enumvalue": 170 return lang + ":enumerator" 171 172 if kind == "variable": 173 return lang + (":member" if "parent" in record else ":var") 174 175 raise RuntimeError("No known role for kind '%s'" % kind) 176 177 178def child_identifier(lang, parent_name, child_name): 179 """ 180 Return the identifier for an enum value or struct member. 181 182 Sphinx, for some reason, uses a different syntax for this in C and C++. 183 """ 184 185 separator = "::" if lang == "cpp" else "." 186 187 return "%s%s%s" % (parent_name, separator, child_name) 188 189 190def link_markup(index, lang, refid): 191 """Return a Sphinx link for a Doxygen reference.""" 192 193 record = index[refid] 194 kind, name = record["kind"], record["name"] 195 role = sphinx_role(record, lang) 196 197 if kind in ["class", "enum", "struct", "typedef", "union"]: 198 return ":%s:`%s`" % (role, name) 199 200 if kind == "function": 201 return ":%s:func:`%s`" % (lang, name) 202 203 if kind == "enumvalue": 204 parent_name = index[record["parent"]]["name"] 205 return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name)) 206 207 if kind == "variable": 208 if "parent" not in record: 209 return ":%s:var:`%s`" % (lang, name) 210 211 parent_name = index[record["parent"]]["name"] 212 return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name)) 213 214 if kind == "define": 215 return ":%s:macro:`%s`" % (lang, name) 216 217 raise RuntimeError("Unknown link target kind: %s" % kind) 218 219 220def indent(markup, depth): 221 """ 222 Indent markup to a depth level. 223 224 Like textwrap.indent() but takes an integer and works in reST indentation 225 levels for clarity." 226 """ 227 228 return textwrap.indent(markup, " " * depth) 229 230 231def heading(text, level): 232 """ 233 Return a ReST heading at a given level. 234 235 Follows the style in the Python documentation guide, see 236 <https://devguide.python.org/documenting/#sections>. 237 """ 238 239 assert 1 <= level <= 6 240 241 chars = ("#", "*", "=", "-", "^", '"') 242 line = chars[level] * len(text) 243 244 return "%s%s\n%s\n\n" % (line + "\n" if level < 3 else "", text, line) 245 246 247def dox_to_rst(index, lang, node): 248 """ 249 Convert documentation commands (docCmdGroup) to Sphinx markup. 250 251 This is used to convert the content of descriptions in the documentation. 252 It recursively parses all children tags and raises a RuntimeError if any 253 unknown tag is encountered. 254 """ 255 256 def field_value(markup): 257 """Return a value for a field as a single line or indented block.""" 258 if "\n" in markup.strip(): 259 return "\n" + indent(markup, 1) 260 261 return " " + markup.strip() 262 263 if node.tag == "lsquo": 264 return "‘" 265 266 if node.tag == "rsquo": 267 return "’" 268 269 if node.tag == "computeroutput": 270 assert len(node) == 0 271 return "``%s``" % node.text 272 273 if node.tag == "itemizedlist": 274 markup = "" 275 for item in node.findall("listitem"): 276 assert len(item) == 1 277 markup += "\n- %s" % dox_to_rst(index, lang, item[0]) 278 279 return markup 280 281 if node.tag == "para": 282 markup = node.text if node.text is not None else "" 283 for child in node: 284 markup += dox_to_rst(index, lang, child) 285 markup += child.tail if child.tail is not None else "" 286 287 return markup.strip() + "\n\n" 288 289 if node.tag == "parameterlist": 290 markup = "" 291 for item in node.findall("parameteritem"): 292 name = item.find("parameternamelist/parametername") 293 description = item.find("parameterdescription") 294 assert len(description) == 1 295 markup += "\n\n:param %s:%s" % ( 296 name.text, 297 field_value(dox_to_rst(index, lang, description[0])), 298 ) 299 300 return markup + "\n" 301 302 if node.tag == "programlisting": 303 return "\n.. code-block:: %s\n\n%s" % ( 304 lang, 305 indent(plain_text(node), 1), 306 ) 307 308 if node.tag == "ref": 309 refid = node.get("refid") 310 if refid not in index: 311 sys.stderr.write("warning: Unresolved link: %s\n" % refid) 312 return node.text 313 314 assert len(node) == 0 315 assert len(link_markup(index, lang, refid)) > 0 316 return link_markup(index, lang, refid) 317 318 if node.tag == "simplesect": 319 assert len(node) == 1 320 321 if node.get("kind") == "return": 322 return "\n:returns:" + field_value( 323 dox_to_rst(index, lang, node[0]) 324 ) + "\n" 325 326 if node.get("kind") == "see": 327 return dox_to_rst(index, lang, node[0]) 328 329 raise RuntimeError("Unknown simplesect kind: %s" % node.get("kind")) 330 331 if node.tag == "ulink": 332 return "`%s <%s>`_" % (node.text, node.get("url")) 333 334 raise RuntimeError("Unknown documentation command: %s" % node.tag) 335 336 337def description_markup(index, lang, node): 338 """Return the markup for a brief or detailed description.""" 339 340 assert node.tag == "briefdescription" or node.tag == "detaileddescription" 341 assert not (node.tag == "briefdescription" and len(node) > 1) 342 assert len(node.text.strip()) == 0 343 344 return "".join([dox_to_rst(index, lang, child) for child in node]).strip() 345 346 347def set_descriptions(index, lang, definition, record): 348 """Set a record's brief/detailed descriptions from the XML definition.""" 349 350 for tag in ["briefdescription", "detaileddescription"]: 351 node = definition.find(tag) 352 if node is not None: 353 record[tag] = description_markup(index, lang, node) 354 355 356def set_template_params(node, record): 357 """Set a record's template_params from the XML definition.""" 358 359 template_param_list = node.find("templateparamlist") 360 if template_param_list is not None: 361 params = [] 362 for param in template_param_list.findall("param"): 363 if param.find("declname") is not None: 364 # Value parameter 365 type_text = plain_text(param.find("type")) 366 name_text = plain_text(param.find("declname")) 367 368 params += ["%s %s" % (type_text, name_text)] 369 else: 370 # Type parameter 371 params += ["%s" % (plain_text(param.find("type")))] 372 373 record["template_params"] = "%s" % ", ".join(params) 374 375 376def plain_text(node): 377 """ 378 Return the plain text of a node with all tags ignored. 379 380 This is needed where Doxygen may include refs but Sphinx needs plain text 381 because it parses things itself to generate links. 382 """ 383 384 if node.tag == "sp": 385 markup = " " 386 elif node.text is not None: 387 markup = node.text 388 else: 389 markup = "" 390 391 for child in node: 392 markup += plain_text(child) 393 markup += child.tail if child.tail is not None else "" 394 395 return markup 396 397 398def local_name(name): 399 """Return a name with all namespace prefixes stripped.""" 400 401 return name[name.rindex("::") + 2 :] if "::" in name else name 402 403 404def read_definition_doc(index, lang, root): 405 """Walk a definition document and update described records in the index.""" 406 407 # Set descriptions for the compound itself 408 compound = root.find("compounddef") 409 compound_record = index[compound.get("id")] 410 set_descriptions(index, lang, compound, compound_record) 411 set_template_params(compound, compound_record) 412 413 if compound.find("title") is not None: 414 compound_record["title"] = compound.find("title").text.strip() 415 416 # Set documentation for all children 417 for section in compound.findall("sectiondef"): 418 if section.get("kind").startswith("private"): 419 continue 420 421 for member in section.findall("memberdef"): 422 kind = member.get("kind") 423 record = index[member.get("id")] 424 set_descriptions(index, lang, member, record) 425 set_template_params(member, record) 426 427 if compound.get("kind") in ["class", "struct", "union"]: 428 assert kind in ["function", "typedef", "variable"] 429 record["type"] = plain_text(member.find("type")) 430 431 if kind == "enum": 432 for value in member.findall("enumvalue"): 433 set_descriptions( 434 index, lang, value, index[value.get("id")] 435 ) 436 437 elif kind == "function": 438 record["prototype"] = "%s %s%s" % ( 439 plain_text(member.find("type")), 440 member.find("name").text, 441 member.find("argsstring").text, 442 ) 443 444 elif kind == "typedef": 445 name = local_name(record["name"]) 446 args_text = member.find("argsstring").text 447 target_text = plain_text(member.find("type")) 448 if args_text is not None: # Function pointer 449 assert target_text[-2:] == "(*" and args_text[0] == ")" 450 record["type"] = target_text + args_text 451 record["definition"] = target_text + name + args_text 452 else: # Normal named typedef 453 assert target_text is not None 454 record["type"] = target_text 455 if member.find("definition").text.startswith("using"): 456 record["definition"] = "%s = %s" % ( 457 name, 458 target_text, 459 ) 460 else: 461 record["definition"] = "%s %s" % ( 462 target_text, 463 name, 464 ) 465 466 elif kind == "variable": 467 record["definition"] = member.find("definition").text 468 469 470def declaration_string(record): 471 """ 472 Return the string that describes a declaration. 473 474 This is what follows the directive, and is in C/C++ syntax, except without 475 keywords like "typedef" and "using" as expected by Sphinx. For example, 476 "struct ThingImpl Thing" or "void run(int value)". 477 """ 478 479 kind = record["kind"] 480 result = "" 481 482 if "template_params" in record: 483 result = "template <%s> " % record["template_params"] 484 485 if kind == "function": 486 result += record["prototype"] 487 elif kind == "typedef": 488 result += record["definition"] 489 elif kind == "variable": 490 if "parent" in record: 491 result += "%s %s" % (record["type"], local_name(record["name"])) 492 else: 493 result += record["definition"] 494 elif "type" in record: 495 result += "%s %s" % (record["type"], local_name(record["name"])) 496 else: 497 result += local_name(record["name"]) 498 499 return result 500 501 502def document_markup(index, lang, record): 503 """Return the complete document that describes some documented entity.""" 504 505 kind = record["kind"] 506 role = sphinx_role(record, lang) 507 name = record["name"] 508 markup = "" 509 510 if name != local_name(name): 511 markup += ".. cpp:namespace:: %s\n\n" % name[0 : name.rindex("::")] 512 513 # Write top-level directive 514 markup += ".. %s:: %s\n" % (role, declaration_string(record)) 515 516 # Write main description blurb 517 markup += "\n" + indent(record["briefdescription"] + "\n", 1) 518 if len(record["detaileddescription"]) > 0: 519 markup += "\n" + indent(record["detaileddescription"], 1) + "\n" 520 521 assert ( 522 kind in ["class", "enum", "namespace", "struct", "union"] 523 or "children" not in record 524 ) 525 526 # Sphinx C++ namespaces work by setting a scope, they have no content 527 child_indent = 0 if kind == "namespace" else 1 528 529 # Write inline children if applicable 530 markup += "\n" if "children" in record else "" 531 for child_id in record.get("children", []): 532 child_record = index[child_id] 533 child_role = sphinx_role(child_record, lang) 534 535 child_header = ".. %s:: %s\n\n" % ( 536 child_role, 537 declaration_string(child_record), 538 ) 539 540 markup += "\n" 541 markup += indent(child_header, child_indent) 542 markup += indent(child_record["briefdescription"], child_indent + 1) 543 markup += indent(child_record["detaileddescription"], child_indent + 1) 544 545 return markup 546 547 548def symbol_filename(name): 549 """Adapt the name of a symbol to be suitable for use as a filename.""" 550 551 return name.replace("::", "__") 552 553 554def emit_groups(index, lang, output_dir, force): 555 """Write a description file for every group documented in the index.""" 556 557 for record in index.values(): 558 if record["kind"] != "group": 559 continue 560 561 name = record["name"] 562 filename = os.path.join(output_dir, "%s.rst" % name) 563 if not force and os.path.exists(filename): 564 raise FileExistsError("File already exists: '%s'" % filename) 565 566 with open(filename, "w") as rst: 567 rst.write(heading(record["title"], 1)) 568 569 # Get all child group and symbol names 570 child_groups = {} 571 child_symbols = {} 572 for child_id in record["children"]: 573 child = index[child_id] 574 if child["kind"] == "group": 575 child_groups[child["name"]] = child 576 else: 577 child_symbols[child["name"]] = child 578 579 # Emit description (document body) 580 if len(record["briefdescription"]) > 0: 581 rst.write(record["briefdescription"] + "\n\n") 582 if len(record["detaileddescription"]) > 0: 583 rst.write(record["detaileddescription"] + "\n\n") 584 585 if len(child_groups) > 0: 586 # Emit TOC for child groups 587 rst.write(".. toctree::\n\n") 588 for name, group in child_groups.items(): 589 rst.write(indent(group["name"], 1) + "\n") 590 591 # Emit symbols in sorted order 592 for name, symbol in child_symbols.items(): 593 rst.write("\n") 594 rst.write(document_markup(index, lang, symbol)) 595 rst.write("\n") 596 597 598def run(index_xml_path, output_dir, language, force): 599 """Write a directory of Sphinx files from a Doxygen XML directory.""" 600 601 # Build skeleton index from index.xml 602 xml_dir = os.path.dirname(index_xml_path) 603 index = load_index(index_xml_path) 604 605 # Load all definition documents 606 definition_docs = [] 607 for record in index.values(): 608 if "xml_filename" in record: 609 xml_path = os.path.join(xml_dir, record["xml_filename"]) 610 definition_docs += [xml.etree.ElementTree.parse(xml_path)] 611 612 # Do an initial pass of the definition documents to resolve the index 613 for root in definition_docs: 614 resolve_index(index, root) 615 616 # Finally read the documentation from definition documents 617 for root in definition_docs: 618 read_definition_doc(index, language, root) 619 620 # Create output directory 621 try: 622 os.makedirs(output_dir) 623 except OSError: 624 pass 625 626 # Emit output files 627 emit_groups(index, language, output_dir, force) 628 629 630if __name__ == "__main__": 631 ap = argparse.ArgumentParser( 632 usage="%(prog)s [OPTION]... XML_DIR OUTPUT_DIR", 633 description=__doc__, 634 formatter_class=argparse.RawDescriptionHelpFormatter, 635 ) 636 637 ap.add_argument( 638 "-f", 639 "--force", 640 action="store_true", 641 help="overwrite files", 642 ) 643 644 ap.add_argument( 645 "-l", 646 "--language", 647 default="c", 648 choices=["c", "cpp"], 649 help="language domain for output", 650 ) 651 652 ap.add_argument("index_xml_path", help="path index.xml from Doxygen") 653 ap.add_argument("output_dir", help="output directory") 654 655 run(**vars(ap.parse_args(sys.argv[1:]))) 656