1#!/usr/bin/env python 2#===- gen_std.py - ------------------------------------------*- python -*--===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8#===------------------------------------------------------------------------===# 9 10"""gen_std.py is a tool to generate a lookup table (from qualified names to 11include headers) for C/C++ Standard Library symbols by parsing archived HTML 12files from cppreference. 13 14The generated files are located in clang/include/Tooling/Inclusions. 15 16Caveats and FIXMEs: 17 - only symbols directly in "std" namespace are added, we should also add std's 18 subnamespace symbols (e.g. chrono). 19 - symbols with multiple variants or defined in multiple headers aren't added, 20 e.g. std::move, std::swap 21 22Usage: 23 1. Install BeautifulSoup dependency, see instruction: 24 https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup 25 2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at 26 https://en.cppreference.com/w/Cppreference:Archives 27 3. Unzip the zip file from step 2 (e.g., to a "cppreference" directory). You should 28 get a "cppreference/reference" directory. 29 4. Run the command: 30 // Generate C++ symbols 31 python3 gen_std.py -cppreference cppreference/reference -symbols=cpp > StdSymbolMap.inc 32 // Generate C++ removed symbols 33 python3 gen_std.py -cppreference cppreference/reference -symbols=cpp_removed > RemovedSymbolMap.inc 34 // Generate C symbols 35 python3 gen_std.py -cppreference cppreference/reference -symbols=c > CSymbolMap.inc 36""" 37 38 39import cppreference_parser 40import argparse 41import datetime 42import os 43import sys 44 45CODE_PREFIX = """\ 46//===-- gen_std.py generated file -------------------------------*- C++ -*-===// 47// 48// Used to build a lookup table (qualified names => include headers) for %s 49// Standard Library symbols. 50// 51// This file was generated automatically by 52// clang/tools/include-mapping/gen_std.py, DO NOT EDIT! 53// 54// Generated from cppreference offline HTML book (modified on %s). 55//===----------------------------------------------------------------------===// 56""" 57 58def ParseArg(): 59 parser = argparse.ArgumentParser(description='Generate StdGen file') 60 parser.add_argument('-cppreference', metavar='PATH', 61 default='', 62 help='path to the cppreference offline HTML directory', 63 required=True 64 ) 65 parser.add_argument('-symbols', 66 default='cpp', 67 help='Generate c or cpp (removed) symbols. One of {cpp, c, cpp_removed}.', 68 required=True) 69 return parser.parse_args() 70 71 72def main(): 73 args = ParseArg() 74 if args.symbols == 'cpp': 75 page_root = os.path.join(args.cppreference, "en", "cpp") 76 symbol_index_root = os.path.join(page_root, "symbol_index") 77 parse_pages = [ 78 (page_root, "symbol_index.html", "std::"), 79 # std sub-namespace symbols have separated pages. 80 # We don't index std literal operators (e.g. 81 # std::literals::chrono_literals::operator""d), these symbols can't be 82 # accessed by std::<symbol_name>. 83 # FIXME: index std::placeholders symbols, placeholders.html page is 84 # different (which contains one entry for _1, _2, ..., _N), we need special 85 # handling. 86 (symbol_index_root, "chrono.html", "std::chrono::"), 87 (symbol_index_root, "filesystem.html", "std::filesystem::"), 88 (symbol_index_root, "pmr.html", "std::pmr::"), 89 (symbol_index_root, "regex_constants.html", "std::regex_constants::"), 90 (symbol_index_root, "this_thread.html", "std::this_thread::"), 91 ] 92 elif args.symbols == 'cpp_removed': 93 page_root = os.path.join(args.cppreference, "en", "cpp") 94 symbol_index_root = os.path.join(page_root, "symbol_index") 95 parse_pages = [(symbol_index_root, "zombie_names.html", "std::")] 96 elif args.symbols == 'c': 97 page_root = os.path.join(args.cppreference, "en", "c") 98 symbol_index_root = page_root 99 parse_pages = [(page_root, "index.html", None)] 100 101 if not os.path.exists(symbol_index_root): 102 exit("Path %s doesn't exist!" % symbol_index_root) 103 104 symbols = cppreference_parser.GetSymbols(parse_pages) 105 106 # We don't have version information from the unzipped offline HTML files. 107 # so we use the modified time of the symbol_index.html as the version. 108 index_page_path = os.path.join(page_root, "index.html") 109 cppreference_modified_date = datetime.datetime.fromtimestamp( 110 os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d') 111 print(CODE_PREFIX % (args.symbols.upper(), cppreference_modified_date)) 112 for symbol in symbols: 113 if len(symbol.headers) == 1: 114 # SYMBOL(unqualified_name, namespace, header) 115 print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace, 116 symbol.headers[0])) 117 elif len(symbol.headers) == 0: 118 sys.stderr.write("No header found for symbol %s\n" % symbol.name) 119 else: 120 # FIXME: support symbols with multiple headers (e.g. std::move). 121 sys.stderr.write("Ambiguous header for symbol %s: %s\n" % ( 122 symbol.name, ', '.join(symbol.headers))) 123 124 125if __name__ == '__main__': 126 main() 127