xref: /qemu/scripts/qapi/parser.py (revision bcfec376)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPISemError, QAPISourceError
22from .source import QAPISourceInfo
23
24
25class QAPIParseError(QAPISourceError):
26    """Error class for all QAPI schema parsing errors."""
27    def __init__(self, parser, msg):
28        col = 1
29        for ch in parser.src[parser.line_pos:parser.pos]:
30            if ch == '\t':
31                col = (col + 7) % 8 + 1
32            else:
33                col += 1
34        super().__init__(parser.info, msg, col)
35
36
37class QAPISchemaParser:
38
39    def __init__(self, fname, previously_included=None, incl_info=None):
40        previously_included = previously_included or set()
41        previously_included.add(os.path.abspath(fname))
42
43        try:
44            fp = open(fname, 'r', encoding='utf-8')
45            self.src = fp.read()
46        except IOError as e:
47            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
48                               "can't read %s file '%s': %s"
49                               % ("include" if incl_info else "schema",
50                                  fname,
51                                  e.strerror))
52
53        if self.src == '' or self.src[-1] != '\n':
54            self.src += '\n'
55        self.cursor = 0
56        self.info = QAPISourceInfo(fname, 1, incl_info)
57        self.line_pos = 0
58        self.exprs = []
59        self.docs = []
60        self.accept()
61        cur_doc = None
62
63        while self.tok is not None:
64            info = self.info
65            if self.tok == '#':
66                self.reject_expr_doc(cur_doc)
67                for cur_doc in self.get_doc(info):
68                    self.docs.append(cur_doc)
69                continue
70
71            expr = self.get_expr(False)
72            if 'include' in expr:
73                self.reject_expr_doc(cur_doc)
74                if len(expr) != 1:
75                    raise QAPISemError(info, "invalid 'include' directive")
76                include = expr['include']
77                if not isinstance(include, str):
78                    raise QAPISemError(info,
79                                       "value of 'include' must be a string")
80                incl_fname = os.path.join(os.path.dirname(fname),
81                                          include)
82                self.exprs.append({'expr': {'include': incl_fname},
83                                   'info': info})
84                exprs_include = self._include(include, info, incl_fname,
85                                              previously_included)
86                if exprs_include:
87                    self.exprs.extend(exprs_include.exprs)
88                    self.docs.extend(exprs_include.docs)
89            elif "pragma" in expr:
90                self.reject_expr_doc(cur_doc)
91                if len(expr) != 1:
92                    raise QAPISemError(info, "invalid 'pragma' directive")
93                pragma = expr['pragma']
94                if not isinstance(pragma, dict):
95                    raise QAPISemError(
96                        info, "value of 'pragma' must be an object")
97                for name, value in pragma.items():
98                    self._pragma(name, value, info)
99            else:
100                expr_elem = {'expr': expr,
101                             'info': info}
102                if cur_doc:
103                    if not cur_doc.symbol:
104                        raise QAPISemError(
105                            cur_doc.info, "definition documentation required")
106                    expr_elem['doc'] = cur_doc
107                self.exprs.append(expr_elem)
108            cur_doc = None
109        self.reject_expr_doc(cur_doc)
110
111    @staticmethod
112    def reject_expr_doc(doc):
113        if doc and doc.symbol:
114            raise QAPISemError(
115                doc.info,
116                "documentation for '%s' is not followed by the definition"
117                % doc.symbol)
118
119    def _include(self, include, info, incl_fname, previously_included):
120        incl_abs_fname = os.path.abspath(incl_fname)
121        # catch inclusion cycle
122        inf = info
123        while inf:
124            if incl_abs_fname == os.path.abspath(inf.fname):
125                raise QAPISemError(info, "inclusion loop for %s" % include)
126            inf = inf.parent
127
128        # skip multiple include of the same file
129        if incl_abs_fname in previously_included:
130            return None
131
132        return QAPISchemaParser(incl_fname, previously_included, info)
133
134    def _check_pragma_list_of_str(self, name, value, info):
135        if (not isinstance(value, list)
136                or any([not isinstance(elt, str) for elt in value])):
137            raise QAPISemError(
138                info,
139                "pragma %s must be a list of strings" % name)
140
141    def _pragma(self, name, value, info):
142        if name == 'doc-required':
143            if not isinstance(value, bool):
144                raise QAPISemError(info,
145                                   "pragma 'doc-required' must be boolean")
146            info.pragma.doc_required = value
147        elif name == 'command-name-exceptions':
148            self._check_pragma_list_of_str(name, value, info)
149            info.pragma.command_name_exceptions = value
150        elif name == 'command-returns-exceptions':
151            self._check_pragma_list_of_str(name, value, info)
152            info.pragma.command_returns_exceptions = value
153        elif name == 'member-name-exceptions':
154            self._check_pragma_list_of_str(name, value, info)
155            info.pragma.member_name_exceptions = value
156        else:
157            raise QAPISemError(info, "unknown pragma '%s'" % name)
158
159    def accept(self, skip_comment=True):
160        while True:
161            self.tok = self.src[self.cursor]
162            self.pos = self.cursor
163            self.cursor += 1
164            self.val = None
165
166            if self.tok == '#':
167                if self.src[self.cursor] == '#':
168                    # Start of doc comment
169                    skip_comment = False
170                self.cursor = self.src.find('\n', self.cursor)
171                if not skip_comment:
172                    self.val = self.src[self.pos:self.cursor]
173                    return
174            elif self.tok in '{}:,[]':
175                return
176            elif self.tok == "'":
177                # Note: we accept only printable ASCII
178                string = ''
179                esc = False
180                while True:
181                    ch = self.src[self.cursor]
182                    self.cursor += 1
183                    if ch == '\n':
184                        raise QAPIParseError(self, "missing terminating \"'\"")
185                    if esc:
186                        # Note: we recognize only \\ because we have
187                        # no use for funny characters in strings
188                        if ch != '\\':
189                            raise QAPIParseError(self,
190                                                 "unknown escape \\%s" % ch)
191                        esc = False
192                    elif ch == '\\':
193                        esc = True
194                        continue
195                    elif ch == "'":
196                        self.val = string
197                        return
198                    if ord(ch) < 32 or ord(ch) >= 127:
199                        raise QAPIParseError(
200                            self, "funny character in string")
201                    string += ch
202            elif self.src.startswith('true', self.pos):
203                self.val = True
204                self.cursor += 3
205                return
206            elif self.src.startswith('false', self.pos):
207                self.val = False
208                self.cursor += 4
209                return
210            elif self.tok == '\n':
211                if self.cursor == len(self.src):
212                    self.tok = None
213                    return
214                self.info = self.info.next_line()
215                self.line_pos = self.cursor
216            elif not self.tok.isspace():
217                # Show up to next structural, whitespace or quote
218                # character
219                match = re.match('[^[\\]{}:,\\s\'"]+',
220                                 self.src[self.cursor-1:])
221                raise QAPIParseError(self, "stray '%s'" % match.group(0))
222
223    def get_members(self):
224        expr = OrderedDict()
225        if self.tok == '}':
226            self.accept()
227            return expr
228        if self.tok != "'":
229            raise QAPIParseError(self, "expected string or '}'")
230        while True:
231            key = self.val
232            self.accept()
233            if self.tok != ':':
234                raise QAPIParseError(self, "expected ':'")
235            self.accept()
236            if key in expr:
237                raise QAPIParseError(self, "duplicate key '%s'" % key)
238            expr[key] = self.get_expr(True)
239            if self.tok == '}':
240                self.accept()
241                return expr
242            if self.tok != ',':
243                raise QAPIParseError(self, "expected ',' or '}'")
244            self.accept()
245            if self.tok != "'":
246                raise QAPIParseError(self, "expected string")
247
248    def get_values(self):
249        expr = []
250        if self.tok == ']':
251            self.accept()
252            return expr
253        if self.tok not in "{['tf":
254            raise QAPIParseError(
255                self, "expected '{', '[', ']', string, or boolean")
256        while True:
257            expr.append(self.get_expr(True))
258            if self.tok == ']':
259                self.accept()
260                return expr
261            if self.tok != ',':
262                raise QAPIParseError(self, "expected ',' or ']'")
263            self.accept()
264
265    def get_expr(self, nested):
266        if self.tok != '{' and not nested:
267            raise QAPIParseError(self, "expected '{'")
268        if self.tok == '{':
269            self.accept()
270            expr = self.get_members()
271        elif self.tok == '[':
272            self.accept()
273            expr = self.get_values()
274        elif self.tok in "'tf":
275            expr = self.val
276            self.accept()
277        else:
278            raise QAPIParseError(
279                self, "expected '{', '[', string, or boolean")
280        return expr
281
282    def get_doc(self, info):
283        if self.val != '##':
284            raise QAPIParseError(
285                self, "junk after '##' at start of documentation comment")
286
287        docs = []
288        cur_doc = QAPIDoc(self, info)
289        self.accept(False)
290        while self.tok == '#':
291            if self.val.startswith('##'):
292                # End of doc comment
293                if self.val != '##':
294                    raise QAPIParseError(
295                        self,
296                        "junk after '##' at end of documentation comment")
297                cur_doc.end_comment()
298                docs.append(cur_doc)
299                self.accept()
300                return docs
301            if self.val.startswith('# ='):
302                if cur_doc.symbol:
303                    raise QAPIParseError(
304                        self,
305                        "unexpected '=' markup in definition documentation")
306                if cur_doc.body.text:
307                    cur_doc.end_comment()
308                    docs.append(cur_doc)
309                    cur_doc = QAPIDoc(self, info)
310            cur_doc.append(self.val)
311            self.accept(False)
312
313        raise QAPIParseError(self, "documentation comment must end with '##'")
314
315
316class QAPIDoc:
317    """
318    A documentation comment block, either definition or free-form
319
320    Definition documentation blocks consist of
321
322    * a body section: one line naming the definition, followed by an
323      overview (any number of lines)
324
325    * argument sections: a description of each argument (for commands
326      and events) or member (for structs, unions and alternates)
327
328    * features sections: a description of each feature flag
329
330    * additional (non-argument) sections, possibly tagged
331
332    Free-form documentation blocks consist only of a body section.
333    """
334
335    class Section:
336        def __init__(self, parser, name=None, indent=0):
337            # parser, for error messages about indentation
338            self._parser = parser
339            # optional section name (argument/member or section name)
340            self.name = name
341            self.text = ''
342            # the expected indent level of the text of this section
343            self._indent = indent
344
345        def append(self, line):
346            # Strip leading spaces corresponding to the expected indent level
347            # Blank lines are always OK.
348            if line:
349                indent = re.match(r'\s*', line).end()
350                if indent < self._indent:
351                    raise QAPIParseError(
352                        self._parser,
353                        "unexpected de-indent (expected at least %d spaces)" %
354                        self._indent)
355                line = line[self._indent:]
356
357            self.text += line.rstrip() + '\n'
358
359    class ArgSection(Section):
360        def __init__(self, parser, name, indent=0):
361            super().__init__(parser, name, indent)
362            self.member = None
363
364        def connect(self, member):
365            self.member = member
366
367    def __init__(self, parser, info):
368        # self._parser is used to report errors with QAPIParseError.  The
369        # resulting error position depends on the state of the parser.
370        # It happens to be the beginning of the comment.  More or less
371        # servicable, but action at a distance.
372        self._parser = parser
373        self.info = info
374        self.symbol = None
375        self.body = QAPIDoc.Section(parser)
376        # dict mapping parameter name to ArgSection
377        self.args = OrderedDict()
378        self.features = OrderedDict()
379        # a list of Section
380        self.sections = []
381        # the current section
382        self._section = self.body
383        self._append_line = self._append_body_line
384
385    def has_section(self, name):
386        """Return True if we have a section with this name."""
387        for i in self.sections:
388            if i.name == name:
389                return True
390        return False
391
392    def append(self, line):
393        """
394        Parse a comment line and add it to the documentation.
395
396        The way that the line is dealt with depends on which part of
397        the documentation we're parsing right now:
398        * The body section: ._append_line is ._append_body_line
399        * An argument section: ._append_line is ._append_args_line
400        * A features section: ._append_line is ._append_features_line
401        * An additional section: ._append_line is ._append_various_line
402        """
403        line = line[1:]
404        if not line:
405            self._append_freeform(line)
406            return
407
408        if line[0] != ' ':
409            raise QAPIParseError(self._parser, "missing space after #")
410        line = line[1:]
411        self._append_line(line)
412
413    def end_comment(self):
414        self._end_section()
415
416    @staticmethod
417    def _is_section_tag(name):
418        return name in ('Returns:', 'Since:',
419                        # those are often singular or plural
420                        'Note:', 'Notes:',
421                        'Example:', 'Examples:',
422                        'TODO:')
423
424    def _append_body_line(self, line):
425        """
426        Process a line of documentation text in the body section.
427
428        If this a symbol line and it is the section's first line, this
429        is a definition documentation block for that symbol.
430
431        If it's a definition documentation block, another symbol line
432        begins the argument section for the argument named by it, and
433        a section tag begins an additional section.  Start that
434        section and append the line to it.
435
436        Else, append the line to the current section.
437        """
438        name = line.split(' ', 1)[0]
439        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
440        # recognized, and get silently treated as ordinary text
441        if not self.symbol and not self.body.text and line.startswith('@'):
442            if not line.endswith(':'):
443                raise QAPIParseError(self._parser, "line should end with ':'")
444            self.symbol = line[1:-1]
445            # FIXME invalid names other than the empty string aren't flagged
446            if not self.symbol:
447                raise QAPIParseError(self._parser, "invalid name")
448        elif self.symbol:
449            # This is a definition documentation block
450            if name.startswith('@') and name.endswith(':'):
451                self._append_line = self._append_args_line
452                self._append_args_line(line)
453            elif line == 'Features:':
454                self._append_line = self._append_features_line
455            elif self._is_section_tag(name):
456                self._append_line = self._append_various_line
457                self._append_various_line(line)
458            else:
459                self._append_freeform(line)
460        else:
461            # This is a free-form documentation block
462            self._append_freeform(line)
463
464    def _append_args_line(self, line):
465        """
466        Process a line of documentation text in an argument section.
467
468        A symbol line begins the next argument section, a section tag
469        section or a non-indented line after a blank line begins an
470        additional section.  Start that section and append the line to
471        it.
472
473        Else, append the line to the current section.
474
475        """
476        name = line.split(' ', 1)[0]
477
478        if name.startswith('@') and name.endswith(':'):
479            # If line is "@arg:   first line of description", find
480            # the index of 'f', which is the indent we expect for any
481            # following lines.  We then remove the leading "@arg:"
482            # from line and replace it with spaces so that 'f' has the
483            # same index as it did in the original line and can be
484            # handled the same way we will handle following lines.
485            indent = re.match(r'@\S*:\s*', line).end()
486            line = line[indent:]
487            if not line:
488                # Line was just the "@arg:" header; following lines
489                # are not indented
490                indent = 0
491            else:
492                line = ' ' * indent + line
493            self._start_args_section(name[1:-1], indent)
494        elif self._is_section_tag(name):
495            self._append_line = self._append_various_line
496            self._append_various_line(line)
497            return
498        elif (self._section.text.endswith('\n\n')
499              and line and not line[0].isspace()):
500            if line == 'Features:':
501                self._append_line = self._append_features_line
502            else:
503                self._start_section()
504                self._append_line = self._append_various_line
505                self._append_various_line(line)
506            return
507
508        self._append_freeform(line)
509
510    def _append_features_line(self, line):
511        name = line.split(' ', 1)[0]
512
513        if name.startswith('@') and name.endswith(':'):
514            # If line is "@arg:   first line of description", find
515            # the index of 'f', which is the indent we expect for any
516            # following lines.  We then remove the leading "@arg:"
517            # from line and replace it with spaces so that 'f' has the
518            # same index as it did in the original line and can be
519            # handled the same way we will handle following lines.
520            indent = re.match(r'@\S*:\s*', line).end()
521            line = line[indent:]
522            if not line:
523                # Line was just the "@arg:" header; following lines
524                # are not indented
525                indent = 0
526            else:
527                line = ' ' * indent + line
528            self._start_features_section(name[1:-1], indent)
529        elif self._is_section_tag(name):
530            self._append_line = self._append_various_line
531            self._append_various_line(line)
532            return
533        elif (self._section.text.endswith('\n\n')
534              and line and not line[0].isspace()):
535            self._start_section()
536            self._append_line = self._append_various_line
537            self._append_various_line(line)
538            return
539
540        self._append_freeform(line)
541
542    def _append_various_line(self, line):
543        """
544        Process a line of documentation text in an additional section.
545
546        A symbol line is an error.
547
548        A section tag begins an additional section.  Start that
549        section and append the line to it.
550
551        Else, append the line to the current section.
552        """
553        name = line.split(' ', 1)[0]
554
555        if name.startswith('@') and name.endswith(':'):
556            raise QAPIParseError(self._parser,
557                                 "'%s' can't follow '%s' section"
558                                 % (name, self.sections[0].name))
559        if self._is_section_tag(name):
560            # If line is "Section:   first line of description", find
561            # the index of 'f', which is the indent we expect for any
562            # following lines.  We then remove the leading "Section:"
563            # from line and replace it with spaces so that 'f' has the
564            # same index as it did in the original line and can be
565            # handled the same way we will handle following lines.
566            indent = re.match(r'\S*:\s*', line).end()
567            line = line[indent:]
568            if not line:
569                # Line was just the "Section:" header; following lines
570                # are not indented
571                indent = 0
572            else:
573                line = ' ' * indent + line
574            self._start_section(name[:-1], indent)
575
576        self._append_freeform(line)
577
578    def _start_symbol_section(self, symbols_dict, name, indent):
579        # FIXME invalid names other than the empty string aren't flagged
580        if not name:
581            raise QAPIParseError(self._parser, "invalid parameter name")
582        if name in symbols_dict:
583            raise QAPIParseError(self._parser,
584                                 "'%s' parameter name duplicated" % name)
585        assert not self.sections
586        self._end_section()
587        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
588        symbols_dict[name] = self._section
589
590    def _start_args_section(self, name, indent):
591        self._start_symbol_section(self.args, name, indent)
592
593    def _start_features_section(self, name, indent):
594        self._start_symbol_section(self.features, name, indent)
595
596    def _start_section(self, name=None, indent=0):
597        if name in ('Returns', 'Since') and self.has_section(name):
598            raise QAPIParseError(self._parser,
599                                 "duplicated '%s' section" % name)
600        self._end_section()
601        self._section = QAPIDoc.Section(self._parser, name, indent)
602        self.sections.append(self._section)
603
604    def _end_section(self):
605        if self._section:
606            text = self._section.text = self._section.text.strip()
607            if self._section.name and (not text or text.isspace()):
608                raise QAPIParseError(
609                    self._parser,
610                    "empty doc section '%s'" % self._section.name)
611            self._section = None
612
613    def _append_freeform(self, line):
614        match = re.match(r'(@\S+:)', line)
615        if match:
616            raise QAPIParseError(self._parser,
617                                 "'%s' not allowed in free-form documentation"
618                                 % match.group(1))
619        self._section.append(line)
620
621    def connect_member(self, member):
622        if member.name not in self.args:
623            # Undocumented TODO outlaw
624            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
625                                                        member.name)
626        self.args[member.name].connect(member)
627
628    def connect_feature(self, feature):
629        if feature.name not in self.features:
630            raise QAPISemError(feature.info,
631                               "feature '%s' lacks documentation"
632                               % feature.name)
633        self.features[feature.name].connect(feature)
634
635    def check_expr(self, expr):
636        if self.has_section('Returns') and 'command' not in expr:
637            raise QAPISemError(self.info,
638                               "'Returns:' is only valid for commands")
639
640    def check(self):
641
642        def check_args_section(args, info, what):
643            bogus = [name for name, section in args.items()
644                     if not section.member]
645            if bogus:
646                raise QAPISemError(
647                    self.info,
648                    "documented member%s '%s' %s not exist"
649                    % ("s" if len(bogus) > 1 else "",
650                       "', '".join(bogus),
651                       "do" if len(bogus) > 1 else "does"))
652
653        check_args_section(self.args, self.info, 'members')
654        check_args_section(self.features, self.info, 'features')
655