1""" 2 sphinx.util.cfamily 3 ~~~~~~~~~~~~~~~~~~~ 4 5 Utility functions common to the C and C++ domains. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11import re 12import warnings 13from copy import deepcopy 14from typing import Any, Callable, List, Match, Optional, Pattern, Tuple, Union 15 16from docutils import nodes 17from docutils.nodes import TextElement 18 19from sphinx.config import Config 20from sphinx.deprecation import RemovedInSphinx40Warning 21from sphinx.util import logging 22 23logger = logging.getLogger(__name__) 24 25StringifyTransform = Callable[[Any], str] 26 27 28_whitespace_re = re.compile(r'(?u)\s+') 29anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b') 30identifier_re = re.compile(r'''(?x) 31 ( # This 'extends' _anon_identifier_re with the ordinary identifiers, 32 # make sure they are in sync. 33 (~?\b[a-zA-Z_]) # ordinary identifiers 34 | (@[a-zA-Z0-9_]) # our extension for names of anonymous entities 35 ) 36 [a-zA-Z0-9_]*\b 37''') 38integer_literal_re = re.compile(r'[1-9][0-9]*') 39octal_literal_re = re.compile(r'0[0-7]*') 40hex_literal_re = re.compile(r'0[xX][0-9a-fA-F][0-9a-fA-F]*') 41binary_literal_re = re.compile(r'0[bB][01][01]*') 42integers_literal_suffix_re = re.compile(r'''(?x) 43 # unsigned and/or (long) long, in any order, but at least one of them 44 ( 45 ([uU] ([lL] | (ll) | (LL))?) 46 | 47 (([lL] | (ll) | (LL)) [uU]?) 48 )\b 49 # the ending word boundary is important for distinguishing 50 # between suffixes and UDLs in C++ 51''') 52float_literal_re = re.compile(r'''(?x) 53 [+-]?( 54 # decimal 55 ([0-9]+[eE][+-]?[0-9]+) 56 | ([0-9]*\.[0-9]+([eE][+-]?[0-9]+)?) 57 | ([0-9]+\.([eE][+-]?[0-9]+)?) 58 # hex 59 | (0[xX][0-9a-fA-F]+[pP][+-]?[0-9a-fA-F]+) 60 | (0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9a-fA-F]+)?) 61 | (0[xX][0-9a-fA-F]+\.([pP][+-]?[0-9a-fA-F]+)?) 62 ) 63''') 64float_literal_suffix_re = re.compile(r'[fFlL]\b') 65# the ending word boundary is important for distinguishing between suffixes and UDLs in C++ 66char_literal_re = re.compile(r'''(?x) 67 ((?:u8)|u|U|L)? 68 '( 69 (?:[^\\']) 70 | (\\( 71 (?:['"?\\abfnrtv]) 72 | (?:[0-7]{1,3}) 73 | (?:x[0-9a-fA-F]{2}) 74 | (?:u[0-9a-fA-F]{4}) 75 | (?:U[0-9a-fA-F]{8}) 76 )) 77 )' 78''') 79 80 81def verify_description_mode(mode: str) -> None: 82 if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'): 83 raise Exception("Description mode '%s' is invalid." % mode) 84 85 86class NoOldIdError(Exception): 87 # Used to avoid implementing unneeded id generation for old id schemes. 88 @property 89 def description(self) -> str: 90 warnings.warn('%s.description is deprecated. ' 91 'Coerce the instance to a string instead.' % self.__class__.__name__, 92 RemovedInSphinx40Warning, stacklevel=2) 93 return str(self) 94 95 96class ASTBaseBase: 97 def __eq__(self, other: Any) -> bool: 98 if type(self) is not type(other): 99 return False 100 try: 101 for key, value in self.__dict__.items(): 102 if value != getattr(other, key): 103 return False 104 except AttributeError: 105 return False 106 return True 107 108 __hash__ = None # type: Callable[[], int] 109 110 def clone(self) -> Any: 111 return deepcopy(self) 112 113 def _stringify(self, transform: StringifyTransform) -> str: 114 raise NotImplementedError(repr(self)) 115 116 def __str__(self) -> str: 117 return self._stringify(lambda ast: str(ast)) 118 119 def get_display_string(self) -> str: 120 return self._stringify(lambda ast: ast.get_display_string()) 121 122 def __repr__(self) -> str: 123 return '<%s>' % self.__class__.__name__ 124 125 126################################################################################ 127# Attributes 128################################################################################ 129 130class ASTAttribute(ASTBaseBase): 131 def describe_signature(self, signode: TextElement) -> None: 132 raise NotImplementedError(repr(self)) 133 134 135class ASTCPPAttribute(ASTAttribute): 136 def __init__(self, arg: str) -> None: 137 self.arg = arg 138 139 def _stringify(self, transform: StringifyTransform) -> str: 140 return "[[" + self.arg + "]]" 141 142 def describe_signature(self, signode: TextElement) -> None: 143 txt = str(self) 144 signode.append(nodes.Text(txt, txt)) 145 146 147class ASTGnuAttribute(ASTBaseBase): 148 def __init__(self, name: str, args: Optional["ASTBaseParenExprList"]) -> None: 149 self.name = name 150 self.args = args 151 152 def _stringify(self, transform: StringifyTransform) -> str: 153 res = [self.name] 154 if self.args: 155 res.append(transform(self.args)) 156 return ''.join(res) 157 158 159class ASTGnuAttributeList(ASTAttribute): 160 def __init__(self, attrs: List[ASTGnuAttribute]) -> None: 161 self.attrs = attrs 162 163 def _stringify(self, transform: StringifyTransform) -> str: 164 res = ['__attribute__(('] 165 first = True 166 for attr in self.attrs: 167 if not first: 168 res.append(', ') 169 first = False 170 res.append(transform(attr)) 171 res.append('))') 172 return ''.join(res) 173 174 def describe_signature(self, signode: TextElement) -> None: 175 txt = str(self) 176 signode.append(nodes.Text(txt, txt)) 177 178 179class ASTIdAttribute(ASTAttribute): 180 """For simple attributes defined by the user.""" 181 182 def __init__(self, id: str) -> None: 183 self.id = id 184 185 def _stringify(self, transform: StringifyTransform) -> str: 186 return self.id 187 188 def describe_signature(self, signode: TextElement) -> None: 189 signode.append(nodes.Text(self.id, self.id)) 190 191 192class ASTParenAttribute(ASTAttribute): 193 """For paren attributes defined by the user.""" 194 195 def __init__(self, id: str, arg: str) -> None: 196 self.id = id 197 self.arg = arg 198 199 def _stringify(self, transform: StringifyTransform) -> str: 200 return self.id + '(' + self.arg + ')' 201 202 def describe_signature(self, signode: TextElement) -> None: 203 txt = str(self) 204 signode.append(nodes.Text(txt, txt)) 205 206 207################################################################################ 208 209class ASTBaseParenExprList(ASTBaseBase): 210 pass 211 212 213################################################################################ 214 215class UnsupportedMultiCharacterCharLiteral(Exception): 216 @property 217 def decoded(self) -> str: 218 warnings.warn('%s.decoded is deprecated. ' 219 'Coerce the instance to a string instead.' % self.__class__.__name__, 220 RemovedInSphinx40Warning, stacklevel=2) 221 return str(self) 222 223 224class DefinitionError(Exception): 225 @property 226 def description(self) -> str: 227 warnings.warn('%s.description is deprecated. ' 228 'Coerce the instance to a string instead.' % self.__class__.__name__, 229 RemovedInSphinx40Warning, stacklevel=2) 230 return str(self) 231 232 233class BaseParser: 234 def __init__(self, definition: str, *, 235 location: Union[nodes.Node, Tuple[str, int]], 236 config: "Config") -> None: 237 self.definition = definition.strip() 238 self.location = location # for warnings 239 self.config = config 240 241 self.pos = 0 242 self.end = len(self.definition) 243 self.last_match = None # type: Match 244 self._previous_state = (0, None) # type: Tuple[int, Match] 245 self.otherErrors = [] # type: List[DefinitionError] 246 247 # in our tests the following is set to False to capture bad parsing 248 self.allowFallbackExpressionParsing = True 249 250 def _make_multi_error(self, errors: List[Any], header: str) -> DefinitionError: 251 if len(errors) == 1: 252 if len(header) > 0: 253 return DefinitionError(header + '\n' + str(errors[0][0])) 254 else: 255 return DefinitionError(str(errors[0][0])) 256 result = [header, '\n'] 257 for e in errors: 258 if len(e[1]) > 0: 259 indent = ' ' 260 result.append(e[1]) 261 result.append(':\n') 262 for line in str(e[0]).split('\n'): 263 if len(line) == 0: 264 continue 265 result.append(indent) 266 result.append(line) 267 result.append('\n') 268 else: 269 result.append(str(e[0])) 270 return DefinitionError(''.join(result)) 271 272 @property 273 def language(self) -> str: 274 raise NotImplementedError 275 276 def status(self, msg: str) -> None: 277 # for debugging 278 indicator = '-' * self.pos + '^' 279 print("%s\n%s\n%s" % (msg, self.definition, indicator)) 280 281 def fail(self, msg: str) -> None: 282 errors = [] 283 indicator = '-' * self.pos + '^' 284 exMain = DefinitionError( 285 'Invalid %s declaration: %s [error at %d]\n %s\n %s' % 286 (self.language, msg, self.pos, self.definition, indicator)) 287 errors.append((exMain, "Main error")) 288 for err in self.otherErrors: 289 errors.append((err, "Potential other error")) 290 self.otherErrors = [] 291 raise self._make_multi_error(errors, '') 292 293 def warn(self, msg: str) -> None: 294 logger.warning(msg, location=self.location) 295 296 def match(self, regex: Pattern) -> bool: 297 match = regex.match(self.definition, self.pos) 298 if match is not None: 299 self._previous_state = (self.pos, self.last_match) 300 self.pos = match.end() 301 self.last_match = match 302 return True 303 return False 304 305 def skip_string(self, string: str) -> bool: 306 strlen = len(string) 307 if self.definition[self.pos:self.pos + strlen] == string: 308 self.pos += strlen 309 return True 310 return False 311 312 def skip_word(self, word: str) -> bool: 313 return self.match(re.compile(r'\b%s\b' % re.escape(word))) 314 315 def skip_ws(self) -> bool: 316 return self.match(_whitespace_re) 317 318 def skip_word_and_ws(self, word: str) -> bool: 319 if self.skip_word(word): 320 self.skip_ws() 321 return True 322 return False 323 324 def skip_string_and_ws(self, string: str) -> bool: 325 if self.skip_string(string): 326 self.skip_ws() 327 return True 328 return False 329 330 @property 331 def eof(self) -> bool: 332 return self.pos >= self.end 333 334 @property 335 def current_char(self) -> str: 336 try: 337 return self.definition[self.pos] 338 except IndexError: 339 return 'EOF' 340 341 @property 342 def matched_text(self) -> str: 343 if self.last_match is not None: 344 return self.last_match.group() 345 else: 346 return None 347 348 def read_rest(self) -> str: 349 rv = self.definition[self.pos:] 350 self.pos = self.end 351 return rv 352 353 def assert_end(self, *, allowSemicolon: bool = False) -> None: 354 self.skip_ws() 355 if allowSemicolon: 356 if not self.eof and self.definition[self.pos:] != ';': 357 self.fail('Expected end of definition or ;.') 358 else: 359 if not self.eof: 360 self.fail('Expected end of definition.') 361 362 ################################################################################ 363 364 @property 365 def id_attributes(self): 366 raise NotImplementedError 367 368 @property 369 def paren_attributes(self): 370 raise NotImplementedError 371 372 def _parse_balanced_token_seq(self, end: List[str]) -> str: 373 # TODO: add handling of string literals and similar 374 brackets = {'(': ')', '[': ']', '{': '}'} 375 startPos = self.pos 376 symbols = [] # type: List[str] 377 while not self.eof: 378 if len(symbols) == 0 and self.current_char in end: 379 break 380 if self.current_char in brackets.keys(): 381 symbols.append(brackets[self.current_char]) 382 elif len(symbols) > 0 and self.current_char == symbols[-1]: 383 symbols.pop() 384 elif self.current_char in ")]}": 385 self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char) 386 self.pos += 1 387 if self.eof: 388 self.fail("Could not find end of balanced-token-seq starting at %d." 389 % startPos) 390 return self.definition[startPos:self.pos] 391 392 def _parse_attribute(self) -> Optional[ASTAttribute]: 393 self.skip_ws() 394 # try C++11 style 395 startPos = self.pos 396 if self.skip_string_and_ws('['): 397 if not self.skip_string('['): 398 self.pos = startPos 399 else: 400 # TODO: actually implement the correct grammar 401 arg = self._parse_balanced_token_seq(end=[']']) 402 if not self.skip_string_and_ws(']'): 403 self.fail("Expected ']' in end of attribute.") 404 if not self.skip_string_and_ws(']'): 405 self.fail("Expected ']' in end of attribute after [[...]") 406 return ASTCPPAttribute(arg) 407 408 # try GNU style 409 if self.skip_word_and_ws('__attribute__'): 410 if not self.skip_string_and_ws('('): 411 self.fail("Expected '(' after '__attribute__'.") 412 if not self.skip_string_and_ws('('): 413 self.fail("Expected '(' after '__attribute__('.") 414 attrs = [] 415 while 1: 416 if self.match(identifier_re): 417 name = self.matched_text 418 exprs = self._parse_paren_expression_list() 419 attrs.append(ASTGnuAttribute(name, exprs)) 420 if self.skip_string_and_ws(','): 421 continue 422 elif self.skip_string_and_ws(')'): 423 break 424 else: 425 self.fail("Expected identifier, ')', or ',' in __attribute__.") 426 if not self.skip_string_and_ws(')'): 427 self.fail("Expected ')' after '__attribute__((...)'") 428 return ASTGnuAttributeList(attrs) 429 430 # try the simple id attributes defined by the user 431 for id in self.id_attributes: 432 if self.skip_word_and_ws(id): 433 return ASTIdAttribute(id) 434 435 # try the paren attributes defined by the user 436 for id in self.paren_attributes: 437 if not self.skip_string_and_ws(id): 438 continue 439 if not self.skip_string('('): 440 self.fail("Expected '(' after user-defined paren-attribute.") 441 arg = self._parse_balanced_token_seq(end=[')']) 442 if not self.skip_string(')'): 443 self.fail("Expected ')' to end user-defined paren-attribute.") 444 return ASTParenAttribute(id, arg) 445 446 return None 447 448 def _parse_paren_expression_list(self) -> ASTBaseParenExprList: 449 raise NotImplementedError 450