1#!/usr/bin/env python3
2# A tool to parse the FormatStyle struct from Format.h and update the
3# documentation in ../ClangFormatStyleOptions.rst automatically.
4# Run from the directory in which this file is located to update the docs.
6import inspect
7import os
8import re
9import sys
10from io import TextIOWrapper
11from typing import Set
13CLANG_DIR = os.path.join(os.path.dirname(__file__), '../..')
14FORMAT_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Format/Format.h')
15INCLUDE_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Tooling/Inclusions/IncludeStyle.h')
16DOC_FILE = os.path.join(CLANG_DIR, 'docs/ClangFormatStyleOptions.rst')
18PLURALS_FILE = os.path.join(os.path.dirname(__file__), 'plurals.txt')
20plurals: Set[str] = set()
21with open(PLURALS_FILE, 'a+') as f:
22  f.seek(0)
23  plurals = set(f.read().splitlines())
25def substitute(text, tag, contents):
26  replacement = '\n.. START_%s\n\n%s\n\n.. END_%s\n' % (tag, contents, tag)
27  pattern = r'\n\.\. START_%s\n.*\n\.\. END_%s\n' % (tag, tag)
28  return re.sub(pattern, '%s', text, flags=re.S) % replacement
30def register_plural(singular: str, plural: str):
31  if plural not in plurals:
32    if not hasattr(register_plural, "generated_new_plural"):
33      print('Plural generation: you can use '
34      f'`git checkout -- {os.path.relpath(PLURALS_FILE)}` '
35      'to reemit warnings or `git add` to include new plurals\n')
36    register_plural.generated_new_plural = True
38    plurals.add(plural)
39    with open(PLURALS_FILE, 'a') as f:
40      f.write(plural + '\n')
41    cf = inspect.currentframe()
42    lineno = ''
43    if cf and cf.f_back:
44      lineno = ':' + str(cf.f_back.f_lineno)
45    print(f'{__file__}{lineno} check if plural of {singular} is {plural}', file=sys.stderr)
46  return plural
48def pluralize(word: str):
49  lword = word.lower()
50  if len(lword) >= 2 and lword[-1] == 'y' and lword[-2] not in 'aeiou':
51    return register_plural(word, word[:-1] + 'ies')
52  elif lword.endswith(('s', 'sh', 'ch', 'x', 'z')):
53    return register_plural(word, word[:-1] + 'es')
54  elif lword.endswith('fe'):
55    return register_plural(word, word[:-2] + 'ves')
56  elif lword.endswith('f') and not lword.endswith('ff'):
57    return register_plural(word, word[:-1] + 'ves')
58  else:
59    return register_plural(word, word + 's')
62def to_yaml_type(typestr: str):
63  if typestr == 'bool':
64    return 'Boolean'
65  elif typestr == 'int':
66    return 'Integer'
67  elif typestr == 'unsigned':
68    return 'Unsigned'
69  elif typestr == 'std::string':
70    return 'String'
72  subtype, napplied = re.subn(r'^std::vector<(.*)>$', r'\1', typestr)
73  if napplied == 1:
74    return 'List of ' + pluralize(to_yaml_type(subtype))
76  return typestr
78def doxygen2rst(text):
79  text = re.sub(r'<tt>\s*(.*?)\s*<\/tt>', r'``\1``', text)
80  text = re.sub(r'\\c ([^ ,;\.]+)', r'``\1``', text)
81  text = re.sub(r'\\\w+ ', '', text)
82  return text
84def indent(text, columns, indent_first_line=True):
85  indent_str = ' ' * columns
86  s = re.sub(r'\n([^\n])', '\n' + indent_str + '\\1', text, flags=re.S)
87  if not indent_first_line or s.startswith('\n'):
88    return s
89  return indent_str + s
91class Option(object):
92  def __init__(self, name, opt_type, comment, version):
93    self.name = name
94    self.type = opt_type
95    self.comment = comment.strip()
96    self.enum = None
97    self.nested_struct = None
98    self.version = version
100  def __str__(self):
101    s = ".. _%s:\n\n**%s** (``%s``) " % (self.name, self.name, to_yaml_type(self.type))
102    if self.version:
103      s += ':versionbadge:`clang-format %s` ' % self.version
104    s += ':ref:`¶ <%s>`\n%s' % (self.name, doxygen2rst(indent(self.comment, 2)))
105    if self.enum and self.enum.values:
106      s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2)
107    if self.nested_struct:
108      s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct,
109                  2)
110    return s
112class NestedStruct(object):
113  def __init__(self, name, comment):
114    self.name = name
115    self.comment = comment.strip()
116    self.values = []
118  def __str__(self):
119    return self.comment + '\n' + '\n'.join(map(str, self.values))
121class NestedField(object):
122  def __init__(self, name, comment):
123    self.name = name
124    self.comment = comment.strip()
126  def __str__(self):
127    return '\n* ``%s`` %s' % (
128        self.name,
129        doxygen2rst(indent(self.comment, 2, indent_first_line=False)))
131class Enum(object):
132  def __init__(self, name, comment):
133    self.name = name
134    self.comment = comment.strip()
135    self.values = []
137  def __str__(self):
138    return '\n'.join(map(str, self.values))
140class NestedEnum(object):
141  def __init__(self, name, enumtype, comment, values):
142    self.name = name
143    self.comment = comment
144    self.values = values
145    self.type = enumtype
147  def __str__(self):
148    s = '\n* ``%s %s``\n%s' % (to_yaml_type(self.type), self.name,
149                                 doxygen2rst(indent(self.comment, 2)))
150    s += indent('\nPossible values:\n\n', 2)
151    s += indent('\n'.join(map(str, self.values)), 2)
152    return s
154class EnumValue(object):
155  def __init__(self, name, comment, config):
156    self.name = name
157    self.comment = comment
158    self.config = config
160  def __str__(self):
161    return '* ``%s`` (in configuration: ``%s``)\n%s' % (
162        self.name,
163        re.sub('.*_', '', self.config),
164        doxygen2rst(indent(self.comment, 2)))
167class OptionsReader:
168  def __init__(self, header: TextIOWrapper):
169    self.header = header
170    self.in_code_block = False
171    self.code_indent = 0
172    self.lineno = 0
173    self.last_err_lineno = -1
175  def __file_path(self):
176    return os.path.relpath(self.header.name)
178  def __print_line(self, line: str):
179    print(f'{self.lineno:>6} | {line}', file=sys.stderr)
181  def __warning(self, msg: str, line: str):
182    print(f'{self.__file_path()}:{self.lineno}: warning: {msg}:', file=sys.stderr)
183    self.__print_line(line)
185  def __clean_comment_line(self, line: str):
186    match = re.match(r'^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$', line)
187    if match:
188      if self.in_code_block:
189        self.__warning('`\\code` in another `\\code`', line)
190      self.in_code_block = True
191      indent_str = match.group('indent')
192      if not indent_str:
193        indent_str = ''
194      self.code_indent = len(indent_str)
195      lang = match.group('lang')
196      if not lang:
197        lang = 'c++'
198      return f'\n{indent_str}.. code-block:: {lang}\n\n'
200    endcode_match = re.match(r'^/// +\\endcode$', line)
201    if endcode_match:
202      if not self.in_code_block:
203        self.__warning('no correct `\\code` found before this `\\endcode`', line)
204      self.in_code_block = False
205      return ''
207    # check code block indentation
208    if (self.in_code_block and not line == '///' and not
209        line.startswith('///  ' + ' ' * self.code_indent)):
210      if self.last_err_lineno == self.lineno - 1:
211        self.__print_line(line)
212      else:
213        self.__warning('code block should be indented', line)
214      self.last_err_lineno = self.lineno
216    match = re.match(r'^/// \\warning$', line)
217    if match:
218      return '\n.. warning:: \n\n'
220    endwarning_match = re.match(r'^/// +\\endwarning$', line)
221    if endwarning_match:
222      return ''
223    return line[4:] + '\n'
225  def read_options(self):
226    class State:
227      BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComment, \
228        InFieldComment, InEnum, InEnumMemberComment = range(8)
229    state = State.BeforeStruct
231    options = []
232    enums = {}
233    nested_structs = {}
234    comment = ''
235    enum = None
236    nested_struct = None
237    version = None
239    for line in self.header:
240      self.lineno += 1
241      line = line.strip()
242      if state == State.BeforeStruct:
243        if line in ('struct FormatStyle {', 'struct IncludeStyle {'):
244          state = State.InStruct
245      elif state == State.InStruct:
246        if line.startswith('///'):
247          state = State.InFieldComment
248          comment = self.__clean_comment_line(line)
249        elif line == '};':
250          state = State.Finished
251          break
252      elif state == State.InFieldComment:
253        if line.startswith(r'/// \version'):
254          match = re.match(r'/// \\version\s*(?P<version>[0-9.]+)*', line)
255          if match:
256            version = match.group('version')
257        elif line.startswith('///'):
258          comment += self.__clean_comment_line(line)
259        elif line.startswith('enum'):
260          state = State.InEnum
261          name = re.sub(r'enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{', '\\1', line)
262          enum = Enum(name, comment)
263        elif line.startswith('struct'):
264          state = State.InNestedStruct
265          name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line)
266          nested_struct = NestedStruct(name, comment)
267        elif line.endswith(';'):
268          prefix = '// '
269          if line.startswith(prefix):
270            line = line[len(prefix):]
271          state = State.InStruct
272          field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);',
273                                            line).groups()
275          if not version:
276            self.__warning(f'missing version for {field_name}', line)
277          option = Option(str(field_name), str(field_type), comment, version)
278          options.append(option)
279          version = None
280        else:
281          raise Exception('Invalid format, expected comment, field or enum\n' + line)
282      elif state == State.InNestedStruct:
283        if line.startswith('///'):
284          state = State.InNestedFieldComment
285          comment = self.__clean_comment_line(line)
286        elif line == '};':
287          state = State.InStruct
288          nested_structs[nested_struct.name] = nested_struct
289      elif state == State.InNestedFieldComment:
290        if line.startswith('///'):
291          comment += self.__clean_comment_line(line)
292        else:
293          state = State.InNestedStruct
294          field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', line).groups()
295          if field_type in enums:
296            nested_struct.values.append(NestedEnum(field_name,
297                                                   field_type,
298                                                   comment,
299                                                   enums[field_type].values))
300          else:
301            nested_struct.values.append(NestedField(field_type + " " + field_name, comment))
303      elif state == State.InEnum:
304        if line.startswith('///'):
305          state = State.InEnumMemberComment
306          comment = self.__clean_comment_line(line)
307        elif line == '};':
308          state = State.InStruct
309          enums[enum.name] = enum
310        else:
311          # Enum member without documentation. Must be documented where the enum
312          # is used.
313          pass
314      elif state == State.InEnumMemberComment:
315        if line.startswith('///'):
316          comment += self.__clean_comment_line(line)
317        else:
318          state = State.InEnum
319          val = line.replace(',', '')
320          pos = val.find(" // ")
321          if pos != -1:
322            config = val[pos + 4:]
323            val = val[:pos]
324          else:
325            config = val
326          enum.values.append(EnumValue(val, comment, config))
327    if state != State.Finished:
328      raise Exception('Not finished by the end of file')
330    for option in options:
331      if option.type not in ['bool', 'unsigned', 'int', 'std::string',
332                             'std::vector<std::string>',
333                             'std::vector<IncludeCategory>',
334                             'std::vector<RawStringFormat>']:
335        if option.type in enums:
336          option.enum = enums[option.type]
337        elif option.type in nested_structs:
338          option.nested_struct = nested_structs[option.type]
339        else:
340          raise Exception('Unknown type: %s' % option.type)
341    return options
344with open(FORMAT_STYLE_FILE) as f:
345  opts = OptionsReader(f).read_options()
346with open(INCLUDE_STYLE_FILE) as f:
347  opts += OptionsReader(f).read_options()
349opts = sorted(opts, key=lambda x: x.name)
350options_text = '\n\n'.join(map(str, opts))
352with open(DOC_FILE) as f:
353  contents = f.read()
355contents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text)
357with open(DOC_FILE, 'wb') as output:
358  output.write(contents.encode())