xref: /qemu/scripts/qapi/parser.py (revision 824f4bac)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20from typing import (
21    Dict,
22    List,
23    Optional,
24    Set,
25    Union,
26)
27
28from .common import must_match
29from .error import QAPISemError, QAPISourceError
30from .source import QAPISourceInfo
31
32
33# Return value alias for get_expr().
34_ExprValue = Union[List[object], Dict[str, object], str, bool]
35
36
37class QAPIParseError(QAPISourceError):
38    """Error class for all QAPI schema parsing errors."""
39    def __init__(self, parser: 'QAPISchemaParser', msg: str):
40        col = 1
41        for ch in parser.src[parser.line_pos:parser.pos]:
42            if ch == '\t':
43                col = (col + 7) % 8 + 1
44            else:
45                col += 1
46        super().__init__(parser.info, msg, col)
47
48
49class QAPISchemaParser:
50    """
51    Parse QAPI schema source.
52
53    Parse a JSON-esque schema file and process directives.  See
54    qapi-code-gen.txt section "Schema Syntax" for the exact syntax.
55    Grammatical validation is handled later by `expr.check_exprs()`.
56
57    :param fname: Source file name.
58    :param previously_included:
59        The absolute names of previously included source files,
60        if being invoked from another parser.
61    :param incl_info:
62       `QAPISourceInfo` belonging to the parent module.
63       ``None`` implies this is the root module.
64
65    :ivar exprs: Resulting parsed expressions.
66    :ivar docs: Resulting parsed documentation blocks.
67
68    :raise OSError: For problems reading the root schema document.
69    :raise QAPIError: For errors in the schema source.
70    """
71    def __init__(self,
72                 fname: str,
73                 previously_included: Optional[Set[str]] = None,
74                 incl_info: Optional[QAPISourceInfo] = None):
75        self._fname = fname
76        self._included = previously_included or set()
77        self._included.add(os.path.abspath(self._fname))
78        self.src = ''
79
80        # Lexer state (see `accept` for details):
81        self.info = QAPISourceInfo(self._fname, incl_info)
82        self.tok: Union[None, str] = None
83        self.pos = 0
84        self.cursor = 0
85        self.val: Optional[Union[bool, str]] = None
86        self.line_pos = 0
87
88        # Parser output:
89        self.exprs: List[Dict[str, object]] = []
90        self.docs: List[QAPIDoc] = []
91
92        # Showtime!
93        self._parse()
94
95    def _parse(self) -> None:
96        """
97        Parse the QAPI schema document.
98
99        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
100        """
101        cur_doc = None
102
103        # May raise OSError; allow the caller to handle it.
104        with open(self._fname, 'r', encoding='utf-8') as fp:
105            self.src = fp.read()
106        if self.src == '' or self.src[-1] != '\n':
107            self.src += '\n'
108
109        # Prime the lexer:
110        self.accept()
111
112        # Parse until done:
113        while self.tok is not None:
114            info = self.info
115            if self.tok == '#':
116                self.reject_expr_doc(cur_doc)
117                for cur_doc in self.get_doc(info):
118                    self.docs.append(cur_doc)
119                continue
120
121            expr = self.get_expr()
122            if not isinstance(expr, dict):
123                raise QAPISemError(
124                    info, "top-level expression must be an object")
125
126            if 'include' in expr:
127                self.reject_expr_doc(cur_doc)
128                if len(expr) != 1:
129                    raise QAPISemError(info, "invalid 'include' directive")
130                include = expr['include']
131                if not isinstance(include, str):
132                    raise QAPISemError(info,
133                                       "value of 'include' must be a string")
134                incl_fname = os.path.join(os.path.dirname(self._fname),
135                                          include)
136                self.exprs.append({'expr': {'include': incl_fname},
137                                   'info': info})
138                exprs_include = self._include(include, info, incl_fname,
139                                              self._included)
140                if exprs_include:
141                    self.exprs.extend(exprs_include.exprs)
142                    self.docs.extend(exprs_include.docs)
143            elif "pragma" in expr:
144                self.reject_expr_doc(cur_doc)
145                if len(expr) != 1:
146                    raise QAPISemError(info, "invalid 'pragma' directive")
147                pragma = expr['pragma']
148                if not isinstance(pragma, dict):
149                    raise QAPISemError(
150                        info, "value of 'pragma' must be an object")
151                for name, value in pragma.items():
152                    self._pragma(name, value, info)
153            else:
154                expr_elem = {'expr': expr,
155                             'info': info}
156                if cur_doc:
157                    if not cur_doc.symbol:
158                        raise QAPISemError(
159                            cur_doc.info, "definition documentation required")
160                    expr_elem['doc'] = cur_doc
161                self.exprs.append(expr_elem)
162            cur_doc = None
163        self.reject_expr_doc(cur_doc)
164
165    @staticmethod
166    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
167        if doc and doc.symbol:
168            raise QAPISemError(
169                doc.info,
170                "documentation for '%s' is not followed by the definition"
171                % doc.symbol)
172
173    @staticmethod
174    def _include(include: str,
175                 info: QAPISourceInfo,
176                 incl_fname: str,
177                 previously_included: Set[str]
178                 ) -> Optional['QAPISchemaParser']:
179        incl_abs_fname = os.path.abspath(incl_fname)
180        # catch inclusion cycle
181        inf: Optional[QAPISourceInfo] = info
182        while inf:
183            if incl_abs_fname == os.path.abspath(inf.fname):
184                raise QAPISemError(info, "inclusion loop for %s" % include)
185            inf = inf.parent
186
187        # skip multiple include of the same file
188        if incl_abs_fname in previously_included:
189            return None
190
191        try:
192            return QAPISchemaParser(incl_fname, previously_included, info)
193        except OSError as err:
194            raise QAPISemError(
195                info,
196                f"can't read include file '{incl_fname}': {err.strerror}"
197            ) from err
198
199    @staticmethod
200    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
201
202        def check_list_str(name: str, value: object) -> List[str]:
203            if (not isinstance(value, list) or
204                    any(not isinstance(elt, str) for elt in value)):
205                raise QAPISemError(
206                    info,
207                    "pragma %s must be a list of strings" % name)
208            return value
209
210        pragma = info.pragma
211
212        if name == 'doc-required':
213            if not isinstance(value, bool):
214                raise QAPISemError(info,
215                                   "pragma 'doc-required' must be boolean")
216            pragma.doc_required = value
217        elif name == 'command-name-exceptions':
218            pragma.command_name_exceptions = check_list_str(name, value)
219        elif name == 'command-returns-exceptions':
220            pragma.command_returns_exceptions = check_list_str(name, value)
221        elif name == 'member-name-exceptions':
222            pragma.member_name_exceptions = check_list_str(name, value)
223        else:
224            raise QAPISemError(info, "unknown pragma '%s'" % name)
225
226    def accept(self, skip_comment: bool = True) -> None:
227        """
228        Read and store the next token.
229
230        :param skip_comment:
231            When false, return COMMENT tokens ("#").
232            This is used when reading documentation blocks.
233
234        :return:
235            None.  Several instance attributes are updated instead:
236
237            - ``.tok`` represents the token type.  See below for values.
238            - ``.info`` describes the token's source location.
239            - ``.val`` is the token's value, if any.  See below.
240            - ``.pos`` is the buffer index of the first character of
241              the token.
242
243        * Single-character tokens:
244
245            These are "{", "}", ":", ",", "[", and "]".
246            ``.tok`` holds the single character and ``.val`` is None.
247
248        * Multi-character tokens:
249
250          * COMMENT:
251
252            This token is not normally returned by the lexer, but it can
253            be when ``skip_comment`` is False.  ``.tok`` is "#", and
254            ``.val`` is a string including all chars until end-of-line,
255            including the "#" itself.
256
257          * STRING:
258
259            ``.tok`` is "'", the single quote.  ``.val`` contains the
260            string, excluding the surrounding quotes.
261
262          * TRUE and FALSE:
263
264            ``.tok`` is either "t" or "f", ``.val`` will be the
265            corresponding bool value.
266
267          * EOF:
268
269            ``.tok`` and ``.val`` will both be None at EOF.
270        """
271        while True:
272            self.tok = self.src[self.cursor]
273            self.pos = self.cursor
274            self.cursor += 1
275            self.val = None
276
277            if self.tok == '#':
278                if self.src[self.cursor] == '#':
279                    # Start of doc comment
280                    skip_comment = False
281                self.cursor = self.src.find('\n', self.cursor)
282                if not skip_comment:
283                    self.val = self.src[self.pos:self.cursor]
284                    return
285            elif self.tok in '{}:,[]':
286                return
287            elif self.tok == "'":
288                # Note: we accept only printable ASCII
289                string = ''
290                esc = False
291                while True:
292                    ch = self.src[self.cursor]
293                    self.cursor += 1
294                    if ch == '\n':
295                        raise QAPIParseError(self, "missing terminating \"'\"")
296                    if esc:
297                        # Note: we recognize only \\ because we have
298                        # no use for funny characters in strings
299                        if ch != '\\':
300                            raise QAPIParseError(self,
301                                                 "unknown escape \\%s" % ch)
302                        esc = False
303                    elif ch == '\\':
304                        esc = True
305                        continue
306                    elif ch == "'":
307                        self.val = string
308                        return
309                    if ord(ch) < 32 or ord(ch) >= 127:
310                        raise QAPIParseError(
311                            self, "funny character in string")
312                    string += ch
313            elif self.src.startswith('true', self.pos):
314                self.val = True
315                self.cursor += 3
316                return
317            elif self.src.startswith('false', self.pos):
318                self.val = False
319                self.cursor += 4
320                return
321            elif self.tok == '\n':
322                if self.cursor == len(self.src):
323                    self.tok = None
324                    return
325                self.info = self.info.next_line()
326                self.line_pos = self.cursor
327            elif not self.tok.isspace():
328                # Show up to next structural, whitespace or quote
329                # character
330                match = must_match('[^[\\]{}:,\\s\'"]+',
331                                   self.src[self.cursor-1:])
332                raise QAPIParseError(self, "stray '%s'" % match.group(0))
333
334    def get_members(self) -> Dict[str, object]:
335        expr: Dict[str, object] = OrderedDict()
336        if self.tok == '}':
337            self.accept()
338            return expr
339        if self.tok != "'":
340            raise QAPIParseError(self, "expected string or '}'")
341        while True:
342            key = self.val
343            assert isinstance(key, str)  # Guaranteed by tok == "'"
344
345            self.accept()
346            if self.tok != ':':
347                raise QAPIParseError(self, "expected ':'")
348            self.accept()
349            if key in expr:
350                raise QAPIParseError(self, "duplicate key '%s'" % key)
351            expr[key] = self.get_expr()
352            if self.tok == '}':
353                self.accept()
354                return expr
355            if self.tok != ',':
356                raise QAPIParseError(self, "expected ',' or '}'")
357            self.accept()
358            if self.tok != "'":
359                raise QAPIParseError(self, "expected string")
360
361    def get_values(self) -> List[object]:
362        expr: List[object] = []
363        if self.tok == ']':
364            self.accept()
365            return expr
366        if self.tok not in tuple("{['tf"):
367            raise QAPIParseError(
368                self, "expected '{', '[', ']', string, or boolean")
369        while True:
370            expr.append(self.get_expr())
371            if self.tok == ']':
372                self.accept()
373                return expr
374            if self.tok != ',':
375                raise QAPIParseError(self, "expected ',' or ']'")
376            self.accept()
377
378    def get_expr(self) -> _ExprValue:
379        expr: _ExprValue
380        if self.tok == '{':
381            self.accept()
382            expr = self.get_members()
383        elif self.tok == '[':
384            self.accept()
385            expr = self.get_values()
386        elif self.tok in tuple("'tf"):
387            assert isinstance(self.val, (str, bool))
388            expr = self.val
389            self.accept()
390        else:
391            raise QAPIParseError(
392                self, "expected '{', '[', string, or boolean")
393        return expr
394
395    def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
396        if self.val != '##':
397            raise QAPIParseError(
398                self, "junk after '##' at start of documentation comment")
399
400        docs = []
401        cur_doc = QAPIDoc(self, info)
402        self.accept(False)
403        while self.tok == '#':
404            assert isinstance(self.val, str)
405            if self.val.startswith('##'):
406                # End of doc comment
407                if self.val != '##':
408                    raise QAPIParseError(
409                        self,
410                        "junk after '##' at end of documentation comment")
411                cur_doc.end_comment()
412                docs.append(cur_doc)
413                self.accept()
414                return docs
415            if self.val.startswith('# ='):
416                if cur_doc.symbol:
417                    raise QAPIParseError(
418                        self,
419                        "unexpected '=' markup in definition documentation")
420                if cur_doc.body.text:
421                    cur_doc.end_comment()
422                    docs.append(cur_doc)
423                    cur_doc = QAPIDoc(self, info)
424            cur_doc.append(self.val)
425            self.accept(False)
426
427        raise QAPIParseError(self, "documentation comment must end with '##'")
428
429
430class QAPIDoc:
431    """
432    A documentation comment block, either definition or free-form
433
434    Definition documentation blocks consist of
435
436    * a body section: one line naming the definition, followed by an
437      overview (any number of lines)
438
439    * argument sections: a description of each argument (for commands
440      and events) or member (for structs, unions and alternates)
441
442    * features sections: a description of each feature flag
443
444    * additional (non-argument) sections, possibly tagged
445
446    Free-form documentation blocks consist only of a body section.
447    """
448
449    class Section:
450        def __init__(self, parser, name=None, indent=0):
451            # parser, for error messages about indentation
452            self._parser = parser
453            # optional section name (argument/member or section name)
454            self.name = name
455            self.text = ''
456            # the expected indent level of the text of this section
457            self._indent = indent
458
459        def append(self, line):
460            # Strip leading spaces corresponding to the expected indent level
461            # Blank lines are always OK.
462            if line:
463                indent = must_match(r'\s*', line).end()
464                if indent < self._indent:
465                    raise QAPIParseError(
466                        self._parser,
467                        "unexpected de-indent (expected at least %d spaces)" %
468                        self._indent)
469                line = line[self._indent:]
470
471            self.text += line.rstrip() + '\n'
472
473    class ArgSection(Section):
474        def __init__(self, parser, name, indent=0):
475            super().__init__(parser, name, indent)
476            self.member = None
477
478        def connect(self, member):
479            self.member = member
480
481    def __init__(self, parser, info):
482        # self._parser is used to report errors with QAPIParseError.  The
483        # resulting error position depends on the state of the parser.
484        # It happens to be the beginning of the comment.  More or less
485        # servicable, but action at a distance.
486        self._parser = parser
487        self.info = info
488        self.symbol = None
489        self.body = QAPIDoc.Section(parser)
490        # dict mapping parameter name to ArgSection
491        self.args = OrderedDict()
492        self.features = OrderedDict()
493        # a list of Section
494        self.sections = []
495        # the current section
496        self._section = self.body
497        self._append_line = self._append_body_line
498
499    def has_section(self, name):
500        """Return True if we have a section with this name."""
501        for i in self.sections:
502            if i.name == name:
503                return True
504        return False
505
506    def append(self, line):
507        """
508        Parse a comment line and add it to the documentation.
509
510        The way that the line is dealt with depends on which part of
511        the documentation we're parsing right now:
512        * The body section: ._append_line is ._append_body_line
513        * An argument section: ._append_line is ._append_args_line
514        * A features section: ._append_line is ._append_features_line
515        * An additional section: ._append_line is ._append_various_line
516        """
517        line = line[1:]
518        if not line:
519            self._append_freeform(line)
520            return
521
522        if line[0] != ' ':
523            raise QAPIParseError(self._parser, "missing space after #")
524        line = line[1:]
525        self._append_line(line)
526
527    def end_comment(self):
528        self._end_section()
529
530    @staticmethod
531    def _is_section_tag(name):
532        return name in ('Returns:', 'Since:',
533                        # those are often singular or plural
534                        'Note:', 'Notes:',
535                        'Example:', 'Examples:',
536                        'TODO:')
537
538    def _append_body_line(self, line):
539        """
540        Process a line of documentation text in the body section.
541
542        If this a symbol line and it is the section's first line, this
543        is a definition documentation block for that symbol.
544
545        If it's a definition documentation block, another symbol line
546        begins the argument section for the argument named by it, and
547        a section tag begins an additional section.  Start that
548        section and append the line to it.
549
550        Else, append the line to the current section.
551        """
552        name = line.split(' ', 1)[0]
553        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
554        # recognized, and get silently treated as ordinary text
555        if not self.symbol and not self.body.text and line.startswith('@'):
556            if not line.endswith(':'):
557                raise QAPIParseError(self._parser, "line should end with ':'")
558            self.symbol = line[1:-1]
559            # FIXME invalid names other than the empty string aren't flagged
560            if not self.symbol:
561                raise QAPIParseError(self._parser, "invalid name")
562        elif self.symbol:
563            # This is a definition documentation block
564            if name.startswith('@') and name.endswith(':'):
565                self._append_line = self._append_args_line
566                self._append_args_line(line)
567            elif line == 'Features:':
568                self._append_line = self._append_features_line
569            elif self._is_section_tag(name):
570                self._append_line = self._append_various_line
571                self._append_various_line(line)
572            else:
573                self._append_freeform(line)
574        else:
575            # This is a free-form documentation block
576            self._append_freeform(line)
577
578    def _append_args_line(self, line):
579        """
580        Process a line of documentation text in an argument section.
581
582        A symbol line begins the next argument section, a section tag
583        section or a non-indented line after a blank line begins an
584        additional section.  Start that section and append the line to
585        it.
586
587        Else, append the line to the current section.
588
589        """
590        name = line.split(' ', 1)[0]
591
592        if name.startswith('@') and name.endswith(':'):
593            # If line is "@arg:   first line of description", find
594            # the index of 'f', which is the indent we expect for any
595            # following lines.  We then remove the leading "@arg:"
596            # from line and replace it with spaces so that 'f' has the
597            # same index as it did in the original line and can be
598            # handled the same way we will handle following lines.
599            indent = must_match(r'@\S*:\s*', line).end()
600            line = line[indent:]
601            if not line:
602                # Line was just the "@arg:" header; following lines
603                # are not indented
604                indent = 0
605            else:
606                line = ' ' * indent + line
607            self._start_args_section(name[1:-1], indent)
608        elif self._is_section_tag(name):
609            self._append_line = self._append_various_line
610            self._append_various_line(line)
611            return
612        elif (self._section.text.endswith('\n\n')
613              and line and not line[0].isspace()):
614            if line == 'Features:':
615                self._append_line = self._append_features_line
616            else:
617                self._start_section()
618                self._append_line = self._append_various_line
619                self._append_various_line(line)
620            return
621
622        self._append_freeform(line)
623
624    def _append_features_line(self, line):
625        name = line.split(' ', 1)[0]
626
627        if name.startswith('@') and name.endswith(':'):
628            # If line is "@arg:   first line of description", find
629            # the index of 'f', which is the indent we expect for any
630            # following lines.  We then remove the leading "@arg:"
631            # from line and replace it with spaces so that 'f' has the
632            # same index as it did in the original line and can be
633            # handled the same way we will handle following lines.
634            indent = must_match(r'@\S*:\s*', line).end()
635            line = line[indent:]
636            if not line:
637                # Line was just the "@arg:" header; following lines
638                # are not indented
639                indent = 0
640            else:
641                line = ' ' * indent + line
642            self._start_features_section(name[1:-1], indent)
643        elif self._is_section_tag(name):
644            self._append_line = self._append_various_line
645            self._append_various_line(line)
646            return
647        elif (self._section.text.endswith('\n\n')
648              and line and not line[0].isspace()):
649            self._start_section()
650            self._append_line = self._append_various_line
651            self._append_various_line(line)
652            return
653
654        self._append_freeform(line)
655
656    def _append_various_line(self, line):
657        """
658        Process a line of documentation text in an additional section.
659
660        A symbol line is an error.
661
662        A section tag begins an additional section.  Start that
663        section and append the line to it.
664
665        Else, append the line to the current section.
666        """
667        name = line.split(' ', 1)[0]
668
669        if name.startswith('@') and name.endswith(':'):
670            raise QAPIParseError(self._parser,
671                                 "'%s' can't follow '%s' section"
672                                 % (name, self.sections[0].name))
673        if self._is_section_tag(name):
674            # If line is "Section:   first line of description", find
675            # the index of 'f', which is the indent we expect for any
676            # following lines.  We then remove the leading "Section:"
677            # from line and replace it with spaces so that 'f' has the
678            # same index as it did in the original line and can be
679            # handled the same way we will handle following lines.
680            indent = must_match(r'\S*:\s*', line).end()
681            line = line[indent:]
682            if not line:
683                # Line was just the "Section:" header; following lines
684                # are not indented
685                indent = 0
686            else:
687                line = ' ' * indent + line
688            self._start_section(name[:-1], indent)
689
690        self._append_freeform(line)
691
692    def _start_symbol_section(self, symbols_dict, name, indent):
693        # FIXME invalid names other than the empty string aren't flagged
694        if not name:
695            raise QAPIParseError(self._parser, "invalid parameter name")
696        if name in symbols_dict:
697            raise QAPIParseError(self._parser,
698                                 "'%s' parameter name duplicated" % name)
699        assert not self.sections
700        self._end_section()
701        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
702        symbols_dict[name] = self._section
703
704    def _start_args_section(self, name, indent):
705        self._start_symbol_section(self.args, name, indent)
706
707    def _start_features_section(self, name, indent):
708        self._start_symbol_section(self.features, name, indent)
709
710    def _start_section(self, name=None, indent=0):
711        if name in ('Returns', 'Since') and self.has_section(name):
712            raise QAPIParseError(self._parser,
713                                 "duplicated '%s' section" % name)
714        self._end_section()
715        self._section = QAPIDoc.Section(self._parser, name, indent)
716        self.sections.append(self._section)
717
718    def _end_section(self):
719        if self._section:
720            text = self._section.text = self._section.text.strip()
721            if self._section.name and (not text or text.isspace()):
722                raise QAPIParseError(
723                    self._parser,
724                    "empty doc section '%s'" % self._section.name)
725            self._section = None
726
727    def _append_freeform(self, line):
728        match = re.match(r'(@\S+:)', line)
729        if match:
730            raise QAPIParseError(self._parser,
731                                 "'%s' not allowed in free-form documentation"
732                                 % match.group(1))
733        self._section.append(line)
734
735    def connect_member(self, member):
736        if member.name not in self.args:
737            # Undocumented TODO outlaw
738            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
739                                                        member.name)
740        self.args[member.name].connect(member)
741
742    def connect_feature(self, feature):
743        if feature.name not in self.features:
744            raise QAPISemError(feature.info,
745                               "feature '%s' lacks documentation"
746                               % feature.name)
747        self.features[feature.name].connect(feature)
748
749    def check_expr(self, expr):
750        if self.has_section('Returns') and 'command' not in expr:
751            raise QAPISemError(self.info,
752                               "'Returns:' is only valid for commands")
753
754    def check(self):
755
756        def check_args_section(args, info, what):
757            bogus = [name for name, section in args.items()
758                     if not section.member]
759            if bogus:
760                raise QAPISemError(
761                    self.info,
762                    "documented member%s '%s' %s not exist"
763                    % ("s" if len(bogus) > 1 else "",
764                       "', '".join(bogus),
765                       "do" if len(bogus) > 1 else "does"))
766
767        check_args_section(self.args, self.info, 'members')
768        check_args_section(self.features, self.info, 'features')
769