xref: /qemu/scripts/qapi/parser.py (revision 7653b1ea)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20from typing import (
21    TYPE_CHECKING,
22    Dict,
23    List,
24    Mapping,
25    Match,
26    Optional,
27    Set,
28    Union,
29)
30
31from .common import must_match
32from .error import QAPISemError, QAPISourceError
33from .source import QAPISourceInfo
34
35
36if TYPE_CHECKING:
37    # pylint: disable=cyclic-import
38    # TODO: Remove cycle. [schema -> expr -> parser -> schema]
39    from .schema import QAPISchemaFeature, QAPISchemaMember
40
41
42# Return value alias for get_expr().
43_ExprValue = Union[List[object], Dict[str, object], str, bool]
44
45
46class QAPIExpression(Dict[str, object]):
47    # pylint: disable=too-few-public-methods
48    def __init__(self,
49                 data: Mapping[str, object],
50                 info: QAPISourceInfo,
51                 doc: Optional['QAPIDoc'] = None):
52        super().__init__(data)
53        self.info = info
54        self.doc: Optional['QAPIDoc'] = doc
55
56
57class QAPIParseError(QAPISourceError):
58    """Error class for all QAPI schema parsing errors."""
59    def __init__(self, parser: 'QAPISchemaParser', msg: str):
60        col = 1
61        for ch in parser.src[parser.line_pos:parser.pos]:
62            if ch == '\t':
63                col = (col + 7) % 8 + 1
64            else:
65                col += 1
66        super().__init__(parser.info, msg, col)
67
68
69class QAPISchemaParser:
70    """
71    Parse QAPI schema source.
72
73    Parse a JSON-esque schema file and process directives.  See
74    qapi-code-gen.rst section "Schema Syntax" for the exact syntax.
75    Grammatical validation is handled later by `expr.check_exprs()`.
76
77    :param fname: Source file name.
78    :param previously_included:
79        The absolute names of previously included source files,
80        if being invoked from another parser.
81    :param incl_info:
82       `QAPISourceInfo` belonging to the parent module.
83       ``None`` implies this is the root module.
84
85    :ivar exprs: Resulting parsed expressions.
86    :ivar docs: Resulting parsed documentation blocks.
87
88    :raise OSError: For problems reading the root schema document.
89    :raise QAPIError: For errors in the schema source.
90    """
91    def __init__(self,
92                 fname: str,
93                 previously_included: Optional[Set[str]] = None,
94                 incl_info: Optional[QAPISourceInfo] = None):
95        self._fname = fname
96        self._included = previously_included or set()
97        self._included.add(os.path.abspath(self._fname))
98        self.src = ''
99
100        # Lexer state (see `accept` for details):
101        self.info = QAPISourceInfo(self._fname, incl_info)
102        self.tok: Union[None, str] = None
103        self.pos = 0
104        self.cursor = 0
105        self.val: Optional[Union[bool, str]] = None
106        self.line_pos = 0
107
108        # Parser output:
109        self.exprs: List[QAPIExpression] = []
110        self.docs: List[QAPIDoc] = []
111
112        # Showtime!
113        self._parse()
114
115    def _parse(self) -> None:
116        """
117        Parse the QAPI schema document.
118
119        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
120        """
121        cur_doc = None
122
123        # May raise OSError; allow the caller to handle it.
124        with open(self._fname, 'r', encoding='utf-8') as fp:
125            self.src = fp.read()
126        if self.src == '' or self.src[-1] != '\n':
127            self.src += '\n'
128
129        # Prime the lexer:
130        self.accept()
131
132        # Parse until done:
133        while self.tok is not None:
134            info = self.info
135            if self.tok == '#':
136                self.reject_expr_doc(cur_doc)
137                cur_doc = self.get_doc()
138                self.docs.append(cur_doc)
139                continue
140
141            expr = self.get_expr()
142            if not isinstance(expr, dict):
143                raise QAPISemError(
144                    info, "top-level expression must be an object")
145
146            if 'include' in expr:
147                self.reject_expr_doc(cur_doc)
148                if len(expr) != 1:
149                    raise QAPISemError(info, "invalid 'include' directive")
150                include = expr['include']
151                if not isinstance(include, str):
152                    raise QAPISemError(info,
153                                       "value of 'include' must be a string")
154                incl_fname = os.path.join(os.path.dirname(self._fname),
155                                          include)
156                self._add_expr(OrderedDict({'include': incl_fname}), info)
157                exprs_include = self._include(include, info, incl_fname,
158                                              self._included)
159                if exprs_include:
160                    self.exprs.extend(exprs_include.exprs)
161                    self.docs.extend(exprs_include.docs)
162            elif "pragma" in expr:
163                self.reject_expr_doc(cur_doc)
164                if len(expr) != 1:
165                    raise QAPISemError(info, "invalid 'pragma' directive")
166                pragma = expr['pragma']
167                if not isinstance(pragma, dict):
168                    raise QAPISemError(
169                        info, "value of 'pragma' must be an object")
170                for name, value in pragma.items():
171                    self._pragma(name, value, info)
172            else:
173                if cur_doc and not cur_doc.symbol:
174                    raise QAPISemError(
175                        cur_doc.info, "definition documentation required")
176                self._add_expr(expr, info, cur_doc)
177            cur_doc = None
178        self.reject_expr_doc(cur_doc)
179
180    def _add_expr(self, expr: Mapping[str, object],
181                  info: QAPISourceInfo,
182                  doc: Optional['QAPIDoc'] = None) -> None:
183        self.exprs.append(QAPIExpression(expr, info, doc))
184
185    @staticmethod
186    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
187        if doc and doc.symbol:
188            raise QAPISemError(
189                doc.info,
190                "documentation for '%s' is not followed by the definition"
191                % doc.symbol)
192
193    @staticmethod
194    def _include(include: str,
195                 info: QAPISourceInfo,
196                 incl_fname: str,
197                 previously_included: Set[str]
198                 ) -> Optional['QAPISchemaParser']:
199        incl_abs_fname = os.path.abspath(incl_fname)
200        # catch inclusion cycle
201        inf: Optional[QAPISourceInfo] = info
202        while inf:
203            if incl_abs_fname == os.path.abspath(inf.fname):
204                raise QAPISemError(info, "inclusion loop for %s" % include)
205            inf = inf.parent
206
207        # skip multiple include of the same file
208        if incl_abs_fname in previously_included:
209            return None
210
211        try:
212            return QAPISchemaParser(incl_fname, previously_included, info)
213        except OSError as err:
214            raise QAPISemError(
215                info,
216                f"can't read include file '{incl_fname}': {err.strerror}"
217            ) from err
218
219    @staticmethod
220    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
221
222        def check_list_str(name: str, value: object) -> List[str]:
223            if (not isinstance(value, list) or
224                    any(not isinstance(elt, str) for elt in value)):
225                raise QAPISemError(
226                    info,
227                    "pragma %s must be a list of strings" % name)
228            return value
229
230        pragma = info.pragma
231
232        if name == 'doc-required':
233            if not isinstance(value, bool):
234                raise QAPISemError(info,
235                                   "pragma 'doc-required' must be boolean")
236            pragma.doc_required = value
237        elif name == 'command-name-exceptions':
238            pragma.command_name_exceptions = check_list_str(name, value)
239        elif name == 'command-returns-exceptions':
240            pragma.command_returns_exceptions = check_list_str(name, value)
241        elif name == 'documentation-exceptions':
242            pragma.documentation_exceptions = check_list_str(name, value)
243        elif name == 'member-name-exceptions':
244            pragma.member_name_exceptions = check_list_str(name, value)
245        else:
246            raise QAPISemError(info, "unknown pragma '%s'" % name)
247
248    def accept(self, skip_comment: bool = True) -> None:
249        """
250        Read and store the next token.
251
252        :param skip_comment:
253            When false, return COMMENT tokens ("#").
254            This is used when reading documentation blocks.
255
256        :return:
257            None.  Several instance attributes are updated instead:
258
259            - ``.tok`` represents the token type.  See below for values.
260            - ``.info`` describes the token's source location.
261            - ``.val`` is the token's value, if any.  See below.
262            - ``.pos`` is the buffer index of the first character of
263              the token.
264
265        * Single-character tokens:
266
267            These are "{", "}", ":", ",", "[", and "]".
268            ``.tok`` holds the single character and ``.val`` is None.
269
270        * Multi-character tokens:
271
272          * COMMENT:
273
274            This token is not normally returned by the lexer, but it can
275            be when ``skip_comment`` is False.  ``.tok`` is "#", and
276            ``.val`` is a string including all chars until end-of-line,
277            including the "#" itself.
278
279          * STRING:
280
281            ``.tok`` is "'", the single quote.  ``.val`` contains the
282            string, excluding the surrounding quotes.
283
284          * TRUE and FALSE:
285
286            ``.tok`` is either "t" or "f", ``.val`` will be the
287            corresponding bool value.
288
289          * EOF:
290
291            ``.tok`` and ``.val`` will both be None at EOF.
292        """
293        while True:
294            self.tok = self.src[self.cursor]
295            self.pos = self.cursor
296            self.cursor += 1
297            self.val = None
298
299            if self.tok == '#':
300                if self.src[self.cursor] == '#':
301                    # Start of doc comment
302                    skip_comment = False
303                self.cursor = self.src.find('\n', self.cursor)
304                if not skip_comment:
305                    self.val = self.src[self.pos:self.cursor]
306                    return
307            elif self.tok in '{}:,[]':
308                return
309            elif self.tok == "'":
310                # Note: we accept only printable ASCII
311                string = ''
312                esc = False
313                while True:
314                    ch = self.src[self.cursor]
315                    self.cursor += 1
316                    if ch == '\n':
317                        raise QAPIParseError(self, "missing terminating \"'\"")
318                    if esc:
319                        # Note: we recognize only \\ because we have
320                        # no use for funny characters in strings
321                        if ch != '\\':
322                            raise QAPIParseError(self,
323                                                 "unknown escape \\%s" % ch)
324                        esc = False
325                    elif ch == '\\':
326                        esc = True
327                        continue
328                    elif ch == "'":
329                        self.val = string
330                        return
331                    if ord(ch) < 32 or ord(ch) >= 127:
332                        raise QAPIParseError(
333                            self, "funny character in string")
334                    string += ch
335            elif self.src.startswith('true', self.pos):
336                self.val = True
337                self.cursor += 3
338                return
339            elif self.src.startswith('false', self.pos):
340                self.val = False
341                self.cursor += 4
342                return
343            elif self.tok == '\n':
344                if self.cursor == len(self.src):
345                    self.tok = None
346                    return
347                self.info = self.info.next_line()
348                self.line_pos = self.cursor
349            elif not self.tok.isspace():
350                # Show up to next structural, whitespace or quote
351                # character
352                match = must_match('[^[\\]{}:,\\s\']+',
353                                   self.src[self.cursor-1:])
354                raise QAPIParseError(self, "stray '%s'" % match.group(0))
355
356    def get_members(self) -> Dict[str, object]:
357        expr: Dict[str, object] = OrderedDict()
358        if self.tok == '}':
359            self.accept()
360            return expr
361        if self.tok != "'":
362            raise QAPIParseError(self, "expected string or '}'")
363        while True:
364            key = self.val
365            assert isinstance(key, str)  # Guaranteed by tok == "'"
366
367            self.accept()
368            if self.tok != ':':
369                raise QAPIParseError(self, "expected ':'")
370            self.accept()
371            if key in expr:
372                raise QAPIParseError(self, "duplicate key '%s'" % key)
373            expr[key] = self.get_expr()
374            if self.tok == '}':
375                self.accept()
376                return expr
377            if self.tok != ',':
378                raise QAPIParseError(self, "expected ',' or '}'")
379            self.accept()
380            if self.tok != "'":
381                raise QAPIParseError(self, "expected string")
382
383    def get_values(self) -> List[object]:
384        expr: List[object] = []
385        if self.tok == ']':
386            self.accept()
387            return expr
388        if self.tok not in tuple("{['tf"):
389            raise QAPIParseError(
390                self, "expected '{', '[', ']', string, or boolean")
391        while True:
392            expr.append(self.get_expr())
393            if self.tok == ']':
394                self.accept()
395                return expr
396            if self.tok != ',':
397                raise QAPIParseError(self, "expected ',' or ']'")
398            self.accept()
399
400    def get_expr(self) -> _ExprValue:
401        expr: _ExprValue
402        if self.tok == '{':
403            self.accept()
404            expr = self.get_members()
405        elif self.tok == '[':
406            self.accept()
407            expr = self.get_values()
408        elif self.tok in tuple("'tf"):
409            assert isinstance(self.val, (str, bool))
410            expr = self.val
411            self.accept()
412        else:
413            raise QAPIParseError(
414                self, "expected '{', '[', string, or boolean")
415        return expr
416
417    def get_doc_line(self) -> Optional[str]:
418        if self.tok != '#':
419            raise QAPIParseError(
420                self, "documentation comment must end with '##'")
421        assert isinstance(self.val, str)
422        if self.val.startswith('##'):
423            # End of doc comment
424            if self.val != '##':
425                raise QAPIParseError(
426                    self, "junk after '##' at end of documentation comment")
427            return None
428        if self.val == '#':
429            return ''
430        if self.val[1] != ' ':
431            raise QAPIParseError(self, "missing space after #")
432        return self.val[2:].rstrip()
433
434    @staticmethod
435    def _match_at_name_colon(string: str) -> Optional[Match[str]]:
436        return re.match(r'@([^:]*): *', string)
437
438    def get_doc_indented(self, doc: 'QAPIDoc') -> Optional[str]:
439        self.accept(False)
440        line = self.get_doc_line()
441        while line == '':
442            doc.append_line(line)
443            self.accept(False)
444            line = self.get_doc_line()
445        if line is None:
446            return line
447        indent = must_match(r'\s*', line).end()
448        if not indent:
449            return line
450        doc.append_line(line[indent:])
451        prev_line_blank = False
452        while True:
453            self.accept(False)
454            line = self.get_doc_line()
455            if line is None:
456                return line
457            if self._match_at_name_colon(line):
458                return line
459            cur_indent = must_match(r'\s*', line).end()
460            if line != '' and cur_indent < indent:
461                if prev_line_blank:
462                    return line
463                raise QAPIParseError(
464                    self,
465                    "unexpected de-indent (expected at least %d spaces)" %
466                    indent)
467            doc.append_line(line[indent:])
468            prev_line_blank = True
469
470    def get_doc_paragraph(self, doc: 'QAPIDoc') -> Optional[str]:
471        while True:
472            self.accept(False)
473            line = self.get_doc_line()
474            if line is None:
475                return line
476            if line == '':
477                return line
478            doc.append_line(line)
479
480    def get_doc(self) -> 'QAPIDoc':
481        if self.val != '##':
482            raise QAPIParseError(
483                self, "junk after '##' at start of documentation comment")
484        info = self.info
485        self.accept(False)
486        line = self.get_doc_line()
487        if line is not None and line.startswith('@'):
488            # Definition documentation
489            if not line.endswith(':'):
490                raise QAPIParseError(self, "line should end with ':'")
491            # Invalid names are not checked here, but the name
492            # provided *must* match the following definition,
493            # which *is* validated in expr.py.
494            symbol = line[1:-1]
495            if not symbol:
496                raise QAPIParseError(self, "name required after '@'")
497            doc = QAPIDoc(info, symbol)
498            self.accept(False)
499            line = self.get_doc_line()
500            no_more_args = False
501
502            while line is not None:
503                # Blank lines
504                while line == '':
505                    self.accept(False)
506                    line = self.get_doc_line()
507                if line is None:
508                    break
509                # Non-blank line, first of a section
510                if line == 'Features:':
511                    if doc.features:
512                        raise QAPIParseError(
513                            self, "duplicated 'Features:' line")
514                    self.accept(False)
515                    line = self.get_doc_line()
516                    while line == '':
517                        self.accept(False)
518                        line = self.get_doc_line()
519                    while (line is not None
520                           and (match := self._match_at_name_colon(line))):
521                        doc.new_feature(self.info, match.group(1))
522                        text = line[match.end():]
523                        if text:
524                            doc.append_line(text)
525                        line = self.get_doc_indented(doc)
526                    if not doc.features:
527                        raise QAPIParseError(
528                            self, 'feature descriptions expected')
529                    no_more_args = True
530                elif match := self._match_at_name_colon(line):
531                    # description
532                    if no_more_args:
533                        raise QAPIParseError(
534                            self,
535                            "description of '@%s:' follows a section"
536                            % match.group(1))
537                    while (line is not None
538                           and (match := self._match_at_name_colon(line))):
539                        doc.new_argument(self.info, match.group(1))
540                        text = line[match.end():]
541                        if text:
542                            doc.append_line(text)
543                        line = self.get_doc_indented(doc)
544                    no_more_args = True
545                elif match := re.match(
546                        r'(Returns|Errors|Since|Notes?|Examples?|TODO): *',
547                        line):
548                    # tagged section
549                    doc.new_tagged_section(self.info, match.group(1))
550                    text = line[match.end():]
551                    if text:
552                        doc.append_line(text)
553                    line = self.get_doc_indented(doc)
554                    no_more_args = True
555                elif line.startswith('='):
556                    raise QAPIParseError(
557                        self,
558                        "unexpected '=' markup in definition documentation")
559                else:
560                    # tag-less paragraph
561                    doc.ensure_untagged_section(self.info)
562                    doc.append_line(line)
563                    line = self.get_doc_paragraph(doc)
564        else:
565            # Free-form documentation
566            doc = QAPIDoc(info)
567            doc.ensure_untagged_section(self.info)
568            first = True
569            while line is not None:
570                if match := self._match_at_name_colon(line):
571                    raise QAPIParseError(
572                        self,
573                        "'@%s:' not allowed in free-form documentation"
574                        % match.group(1))
575                if line.startswith('='):
576                    if not first:
577                        raise QAPIParseError(
578                            self,
579                            "'=' heading must come first in a comment block")
580                doc.append_line(line)
581                self.accept(False)
582                line = self.get_doc_line()
583                first = False
584
585        self.accept(False)
586        doc.end()
587        return doc
588
589
590class QAPIDoc:
591    """
592    A documentation comment block, either definition or free-form
593
594    Definition documentation blocks consist of
595
596    * a body section: one line naming the definition, followed by an
597      overview (any number of lines)
598
599    * argument sections: a description of each argument (for commands
600      and events) or member (for structs, unions and alternates)
601
602    * features sections: a description of each feature flag
603
604    * additional (non-argument) sections, possibly tagged
605
606    Free-form documentation blocks consist only of a body section.
607    """
608
609    class Section:
610        def __init__(self, info: QAPISourceInfo,
611                     tag: Optional[str] = None):
612            # section source info, i.e. where it begins
613            self.info = info
614            # section tag, if any ('Returns', '@name', ...)
615            self.tag = tag
616            # section text without tag
617            self.text = ''
618
619        def append_line(self, line: str) -> None:
620            self.text += line + '\n'
621
622    class ArgSection(Section):
623        def __init__(self, info: QAPISourceInfo, tag: str):
624            super().__init__(info, tag)
625            self.member: Optional['QAPISchemaMember'] = None
626
627        def connect(self, member: 'QAPISchemaMember') -> None:
628            self.member = member
629
630    def __init__(self, info: QAPISourceInfo, symbol: Optional[str] = None):
631        # info points to the doc comment block's first line
632        self.info = info
633        # definition doc's symbol, None for free-form doc
634        self.symbol: Optional[str] = symbol
635        # the sections in textual order
636        self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(info)]
637        # the body section
638        self.body: Optional[QAPIDoc.Section] = self.all_sections[0]
639        # dicts mapping parameter/feature names to their description
640        self.args: Dict[str, QAPIDoc.ArgSection] = {}
641        self.features: Dict[str, QAPIDoc.ArgSection] = {}
642        # a command's "Returns" and "Errors" section
643        self.returns: Optional[QAPIDoc.Section] = None
644        self.errors: Optional[QAPIDoc.Section] = None
645        # "Since" section
646        self.since: Optional[QAPIDoc.Section] = None
647        # sections other than .body, .args, .features
648        self.sections: List[QAPIDoc.Section] = []
649
650    def end(self) -> None:
651        for section in self.all_sections:
652            section.text = section.text.strip('\n')
653            if section.tag is not None and section.text == '':
654                raise QAPISemError(
655                    section.info, "text required after '%s:'" % section.tag)
656
657    def ensure_untagged_section(self, info: QAPISourceInfo) -> None:
658        if self.all_sections and not self.all_sections[-1].tag:
659            # extend current section
660            self.all_sections[-1].text += '\n'
661            return
662        # start new section
663        section = self.Section(info)
664        self.sections.append(section)
665        self.all_sections.append(section)
666
667    def new_tagged_section(self, info: QAPISourceInfo, tag: str) -> None:
668        section = self.Section(info, tag)
669        if tag == 'Returns':
670            if self.returns:
671                raise QAPISemError(
672                    info, "duplicated '%s' section" % tag)
673            self.returns = section
674        elif tag == 'Errors':
675            if self.errors:
676                raise QAPISemError(
677                    info, "duplicated '%s' section" % tag)
678            self.errors = section
679        elif tag == 'Since':
680            if self.since:
681                raise QAPISemError(
682                    info, "duplicated '%s' section" % tag)
683            self.since = section
684        self.sections.append(section)
685        self.all_sections.append(section)
686
687    def _new_description(self, info: QAPISourceInfo, name: str,
688                         desc: Dict[str, ArgSection]) -> None:
689        if not name:
690            raise QAPISemError(info, "invalid parameter name")
691        if name in desc:
692            raise QAPISemError(info, "'%s' parameter name duplicated" % name)
693        section = self.ArgSection(info, '@' + name)
694        self.all_sections.append(section)
695        desc[name] = section
696
697    def new_argument(self, info: QAPISourceInfo, name: str) -> None:
698        self._new_description(info, name, self.args)
699
700    def new_feature(self, info: QAPISourceInfo, name: str) -> None:
701        self._new_description(info, name, self.features)
702
703    def append_line(self, line: str) -> None:
704        self.all_sections[-1].append_line(line)
705
706    def connect_member(self, member: 'QAPISchemaMember') -> None:
707        if member.name not in self.args:
708            if self.symbol not in member.info.pragma.documentation_exceptions:
709                raise QAPISemError(member.info,
710                                   "%s '%s' lacks documentation"
711                                   % (member.role, member.name))
712            self.args[member.name] = QAPIDoc.ArgSection(
713                self.info, '@' + member.name)
714        self.args[member.name].connect(member)
715
716    def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
717        if feature.name not in self.features:
718            raise QAPISemError(feature.info,
719                               "feature '%s' lacks documentation"
720                               % feature.name)
721        self.features[feature.name].connect(feature)
722
723    def check_expr(self, expr: QAPIExpression) -> None:
724        if 'command' in expr:
725            if self.returns and 'returns' not in expr:
726                raise QAPISemError(
727                    self.returns.info,
728                    "'Returns' section, but command doesn't return anything")
729        else:
730            if self.returns:
731                raise QAPISemError(
732                    self.returns.info,
733                    "'Returns' section is only valid for commands")
734            if self.errors:
735                raise QAPISemError(
736                    self.returns.info,
737                    "'Errors' section is only valid for commands")
738
739    def check(self) -> None:
740
741        def check_args_section(
742                args: Dict[str, QAPIDoc.ArgSection], what: str
743        ) -> None:
744            bogus = [name for name, section in args.items()
745                     if not section.member]
746            if bogus:
747                raise QAPISemError(
748                    args[bogus[0]].info,
749                    "documented %s%s '%s' %s not exist" % (
750                        what,
751                        "s" if len(bogus) > 1 else "",
752                        "', '".join(bogus),
753                        "do" if len(bogus) > 1 else "does"
754                    ))
755
756        check_args_section(self.args, 'member')
757        check_args_section(self.features, 'feature')
758