1"""
2    sphinx.util.cfamily
3    ~~~~~~~~~~~~~~~~~~~
4
5    Utility functions common to the C and C++ domains.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11import re
12import warnings
13from copy import deepcopy
14from typing import Any, Callable, List, Match, Optional, Pattern, Tuple, Union
15
16from docutils import nodes
17from docutils.nodes import TextElement
18
19from sphinx.config import Config
20from sphinx.deprecation import RemovedInSphinx40Warning
21from sphinx.util import logging
22
23logger = logging.getLogger(__name__)
24
25StringifyTransform = Callable[[Any], str]
26
27
28_whitespace_re = re.compile(r'(?u)\s+')
29anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
30identifier_re = re.compile(r'''(?x)
31    (   # This 'extends' _anon_identifier_re with the ordinary identifiers,
32        # make sure they are in sync.
33        (~?\b[a-zA-Z_])  # ordinary identifiers
34    |   (@[a-zA-Z0-9_])  # our extension for names of anonymous entities
35    )
36    [a-zA-Z0-9_]*\b
37''')
38integer_literal_re = re.compile(r'[1-9][0-9]*')
39octal_literal_re = re.compile(r'0[0-7]*')
40hex_literal_re = re.compile(r'0[xX][0-9a-fA-F][0-9a-fA-F]*')
41binary_literal_re = re.compile(r'0[bB][01][01]*')
42integers_literal_suffix_re = re.compile(r'''(?x)
43    # unsigned and/or (long) long, in any order, but at least one of them
44    (
45        ([uU]    ([lL]  |  (ll)  |  (LL))?)
46        |
47        (([lL]  |  (ll)  |  (LL))    [uU]?)
48    )\b
49    # the ending word boundary is important for distinguishing
50    # between suffixes and UDLs in C++
51''')
52float_literal_re = re.compile(r'''(?x)
53    [+-]?(
54    # decimal
55      ([0-9]+[eE][+-]?[0-9]+)
56    | ([0-9]*\.[0-9]+([eE][+-]?[0-9]+)?)
57    | ([0-9]+\.([eE][+-]?[0-9]+)?)
58    # hex
59    | (0[xX][0-9a-fA-F]+[pP][+-]?[0-9a-fA-F]+)
60    | (0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9a-fA-F]+)?)
61    | (0[xX][0-9a-fA-F]+\.([pP][+-]?[0-9a-fA-F]+)?)
62    )
63''')
64float_literal_suffix_re = re.compile(r'[fFlL]\b')
65# the ending word boundary is important for distinguishing between suffixes and UDLs in C++
66char_literal_re = re.compile(r'''(?x)
67    ((?:u8)|u|U|L)?
68    '(
69      (?:[^\\'])
70    | (\\(
71        (?:['"?\\abfnrtv])
72      | (?:[0-7]{1,3})
73      | (?:x[0-9a-fA-F]{2})
74      | (?:u[0-9a-fA-F]{4})
75      | (?:U[0-9a-fA-F]{8})
76      ))
77    )'
78''')
79
80
81def verify_description_mode(mode: str) -> None:
82    if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'):
83        raise Exception("Description mode '%s' is invalid." % mode)
84
85
86class NoOldIdError(Exception):
87    # Used to avoid implementing unneeded id generation for old id schemes.
88    @property
89    def description(self) -> str:
90        warnings.warn('%s.description is deprecated. '
91                      'Coerce the instance to a string instead.' % self.__class__.__name__,
92                      RemovedInSphinx40Warning, stacklevel=2)
93        return str(self)
94
95
96class ASTBaseBase:
97    def __eq__(self, other: Any) -> bool:
98        if type(self) is not type(other):
99            return False
100        try:
101            for key, value in self.__dict__.items():
102                if value != getattr(other, key):
103                    return False
104        except AttributeError:
105            return False
106        return True
107
108    __hash__ = None  # type: Callable[[], int]
109
110    def clone(self) -> Any:
111        return deepcopy(self)
112
113    def _stringify(self, transform: StringifyTransform) -> str:
114        raise NotImplementedError(repr(self))
115
116    def __str__(self) -> str:
117        return self._stringify(lambda ast: str(ast))
118
119    def get_display_string(self) -> str:
120        return self._stringify(lambda ast: ast.get_display_string())
121
122    def __repr__(self) -> str:
123        return '<%s>' % self.__class__.__name__
124
125
126################################################################################
127# Attributes
128################################################################################
129
130class ASTAttribute(ASTBaseBase):
131    def describe_signature(self, signode: TextElement) -> None:
132        raise NotImplementedError(repr(self))
133
134
135class ASTCPPAttribute(ASTAttribute):
136    def __init__(self, arg: str) -> None:
137        self.arg = arg
138
139    def _stringify(self, transform: StringifyTransform) -> str:
140        return "[[" + self.arg + "]]"
141
142    def describe_signature(self, signode: TextElement) -> None:
143        txt = str(self)
144        signode.append(nodes.Text(txt, txt))
145
146
147class ASTGnuAttribute(ASTBaseBase):
148    def __init__(self, name: str, args: Optional["ASTBaseParenExprList"]) -> None:
149        self.name = name
150        self.args = args
151
152    def _stringify(self, transform: StringifyTransform) -> str:
153        res = [self.name]
154        if self.args:
155            res.append(transform(self.args))
156        return ''.join(res)
157
158
159class ASTGnuAttributeList(ASTAttribute):
160    def __init__(self, attrs: List[ASTGnuAttribute]) -> None:
161        self.attrs = attrs
162
163    def _stringify(self, transform: StringifyTransform) -> str:
164        res = ['__attribute__((']
165        first = True
166        for attr in self.attrs:
167            if not first:
168                res.append(', ')
169            first = False
170            res.append(transform(attr))
171        res.append('))')
172        return ''.join(res)
173
174    def describe_signature(self, signode: TextElement) -> None:
175        txt = str(self)
176        signode.append(nodes.Text(txt, txt))
177
178
179class ASTIdAttribute(ASTAttribute):
180    """For simple attributes defined by the user."""
181
182    def __init__(self, id: str) -> None:
183        self.id = id
184
185    def _stringify(self, transform: StringifyTransform) -> str:
186        return self.id
187
188    def describe_signature(self, signode: TextElement) -> None:
189        signode.append(nodes.Text(self.id, self.id))
190
191
192class ASTParenAttribute(ASTAttribute):
193    """For paren attributes defined by the user."""
194
195    def __init__(self, id: str, arg: str) -> None:
196        self.id = id
197        self.arg = arg
198
199    def _stringify(self, transform: StringifyTransform) -> str:
200        return self.id + '(' + self.arg + ')'
201
202    def describe_signature(self, signode: TextElement) -> None:
203        txt = str(self)
204        signode.append(nodes.Text(txt, txt))
205
206
207################################################################################
208
209class ASTBaseParenExprList(ASTBaseBase):
210    pass
211
212
213################################################################################
214
215class UnsupportedMultiCharacterCharLiteral(Exception):
216    @property
217    def decoded(self) -> str:
218        warnings.warn('%s.decoded is deprecated. '
219                      'Coerce the instance to a string instead.' % self.__class__.__name__,
220                      RemovedInSphinx40Warning, stacklevel=2)
221        return str(self)
222
223
224class DefinitionError(Exception):
225    @property
226    def description(self) -> str:
227        warnings.warn('%s.description is deprecated. '
228                      'Coerce the instance to a string instead.' % self.__class__.__name__,
229                      RemovedInSphinx40Warning, stacklevel=2)
230        return str(self)
231
232
233class BaseParser:
234    def __init__(self, definition: str, *,
235                 location: Union[nodes.Node, Tuple[str, int]],
236                 config: "Config") -> None:
237        self.definition = definition.strip()
238        self.location = location  # for warnings
239        self.config = config
240
241        self.pos = 0
242        self.end = len(self.definition)
243        self.last_match = None  # type: Match
244        self._previous_state = (0, None)  # type: Tuple[int, Match]
245        self.otherErrors = []  # type: List[DefinitionError]
246
247        # in our tests the following is set to False to capture bad parsing
248        self.allowFallbackExpressionParsing = True
249
250    def _make_multi_error(self, errors: List[Any], header: str) -> DefinitionError:
251        if len(errors) == 1:
252            if len(header) > 0:
253                return DefinitionError(header + '\n' + str(errors[0][0]))
254            else:
255                return DefinitionError(str(errors[0][0]))
256        result = [header, '\n']
257        for e in errors:
258            if len(e[1]) > 0:
259                indent = '  '
260                result.append(e[1])
261                result.append(':\n')
262                for line in str(e[0]).split('\n'):
263                    if len(line) == 0:
264                        continue
265                    result.append(indent)
266                    result.append(line)
267                    result.append('\n')
268            else:
269                result.append(str(e[0]))
270        return DefinitionError(''.join(result))
271
272    @property
273    def language(self) -> str:
274        raise NotImplementedError
275
276    def status(self, msg: str) -> None:
277        # for debugging
278        indicator = '-' * self.pos + '^'
279        print("%s\n%s\n%s" % (msg, self.definition, indicator))
280
281    def fail(self, msg: str) -> None:
282        errors = []
283        indicator = '-' * self.pos + '^'
284        exMain = DefinitionError(
285            'Invalid %s declaration: %s [error at %d]\n  %s\n  %s' %
286            (self.language, msg, self.pos, self.definition, indicator))
287        errors.append((exMain, "Main error"))
288        for err in self.otherErrors:
289            errors.append((err, "Potential other error"))
290        self.otherErrors = []
291        raise self._make_multi_error(errors, '')
292
293    def warn(self, msg: str) -> None:
294        logger.warning(msg, location=self.location)
295
296    def match(self, regex: Pattern) -> bool:
297        match = regex.match(self.definition, self.pos)
298        if match is not None:
299            self._previous_state = (self.pos, self.last_match)
300            self.pos = match.end()
301            self.last_match = match
302            return True
303        return False
304
305    def skip_string(self, string: str) -> bool:
306        strlen = len(string)
307        if self.definition[self.pos:self.pos + strlen] == string:
308            self.pos += strlen
309            return True
310        return False
311
312    def skip_word(self, word: str) -> bool:
313        return self.match(re.compile(r'\b%s\b' % re.escape(word)))
314
315    def skip_ws(self) -> bool:
316        return self.match(_whitespace_re)
317
318    def skip_word_and_ws(self, word: str) -> bool:
319        if self.skip_word(word):
320            self.skip_ws()
321            return True
322        return False
323
324    def skip_string_and_ws(self, string: str) -> bool:
325        if self.skip_string(string):
326            self.skip_ws()
327            return True
328        return False
329
330    @property
331    def eof(self) -> bool:
332        return self.pos >= self.end
333
334    @property
335    def current_char(self) -> str:
336        try:
337            return self.definition[self.pos]
338        except IndexError:
339            return 'EOF'
340
341    @property
342    def matched_text(self) -> str:
343        if self.last_match is not None:
344            return self.last_match.group()
345        else:
346            return None
347
348    def read_rest(self) -> str:
349        rv = self.definition[self.pos:]
350        self.pos = self.end
351        return rv
352
353    def assert_end(self, *, allowSemicolon: bool = False) -> None:
354        self.skip_ws()
355        if allowSemicolon:
356            if not self.eof and self.definition[self.pos:] != ';':
357                self.fail('Expected end of definition or ;.')
358        else:
359            if not self.eof:
360                self.fail('Expected end of definition.')
361
362    ################################################################################
363
364    @property
365    def id_attributes(self):
366        raise NotImplementedError
367
368    @property
369    def paren_attributes(self):
370        raise NotImplementedError
371
372    def _parse_balanced_token_seq(self, end: List[str]) -> str:
373        # TODO: add handling of string literals and similar
374        brackets = {'(': ')', '[': ']', '{': '}'}
375        startPos = self.pos
376        symbols = []  # type: List[str]
377        while not self.eof:
378            if len(symbols) == 0 and self.current_char in end:
379                break
380            if self.current_char in brackets.keys():
381                symbols.append(brackets[self.current_char])
382            elif len(symbols) > 0 and self.current_char == symbols[-1]:
383                symbols.pop()
384            elif self.current_char in ")]}":
385                self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char)
386            self.pos += 1
387        if self.eof:
388            self.fail("Could not find end of balanced-token-seq starting at %d."
389                      % startPos)
390        return self.definition[startPos:self.pos]
391
392    def _parse_attribute(self) -> Optional[ASTAttribute]:
393        self.skip_ws()
394        # try C++11 style
395        startPos = self.pos
396        if self.skip_string_and_ws('['):
397            if not self.skip_string('['):
398                self.pos = startPos
399            else:
400                # TODO: actually implement the correct grammar
401                arg = self._parse_balanced_token_seq(end=[']'])
402                if not self.skip_string_and_ws(']'):
403                    self.fail("Expected ']' in end of attribute.")
404                if not self.skip_string_and_ws(']'):
405                    self.fail("Expected ']' in end of attribute after [[...]")
406                return ASTCPPAttribute(arg)
407
408        # try GNU style
409        if self.skip_word_and_ws('__attribute__'):
410            if not self.skip_string_and_ws('('):
411                self.fail("Expected '(' after '__attribute__'.")
412            if not self.skip_string_and_ws('('):
413                self.fail("Expected '(' after '__attribute__('.")
414            attrs = []
415            while 1:
416                if self.match(identifier_re):
417                    name = self.matched_text
418                    exprs = self._parse_paren_expression_list()
419                    attrs.append(ASTGnuAttribute(name, exprs))
420                if self.skip_string_and_ws(','):
421                    continue
422                elif self.skip_string_and_ws(')'):
423                    break
424                else:
425                    self.fail("Expected identifier, ')', or ',' in __attribute__.")
426            if not self.skip_string_and_ws(')'):
427                self.fail("Expected ')' after '__attribute__((...)'")
428            return ASTGnuAttributeList(attrs)
429
430        # try the simple id attributes defined by the user
431        for id in self.id_attributes:
432            if self.skip_word_and_ws(id):
433                return ASTIdAttribute(id)
434
435        # try the paren attributes defined by the user
436        for id in self.paren_attributes:
437            if not self.skip_string_and_ws(id):
438                continue
439            if not self.skip_string('('):
440                self.fail("Expected '(' after user-defined paren-attribute.")
441            arg = self._parse_balanced_token_seq(end=[')'])
442            if not self.skip_string(')'):
443                self.fail("Expected ')' to end user-defined paren-attribute.")
444            return ASTParenAttribute(id, arg)
445
446        return None
447
448    def _parse_paren_expression_list(self) -> ASTBaseParenExprList:
449        raise NotImplementedError
450