xref: /qemu/scripts/qapi/parser.py (revision dc293f60)
1# -*- coding: utf-8 -*-
2#
3# QAPI schema parser
4#
5# Copyright IBM, Corp. 2011
6# Copyright (c) 2013-2019 Red Hat Inc.
7#
8# Authors:
9#  Anthony Liguori <aliguori@us.ibm.com>
10#  Markus Armbruster <armbru@redhat.com>
11#  Marc-André Lureau <marcandre.lureau@redhat.com>
12#  Kevin Wolf <kwolf@redhat.com>
13#
14# This work is licensed under the terms of the GNU GPL, version 2.
15# See the COPYING file in the top-level directory.
16
17from collections import OrderedDict
18import os
19import re
20
21from .error import QAPIParseError, QAPISemError
22from .source import QAPISourceInfo
23
24
25class QAPISchemaParser:
26
27    def __init__(self, fname, previously_included=None, incl_info=None):
28        previously_included = previously_included or set()
29        previously_included.add(os.path.abspath(fname))
30
31        try:
32            fp = open(fname, 'r', encoding='utf-8')
33            self.src = fp.read()
34        except IOError as e:
35            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
36                               "can't read %s file '%s': %s"
37                               % ("include" if incl_info else "schema",
38                                  fname,
39                                  e.strerror))
40
41        if self.src == '' or self.src[-1] != '\n':
42            self.src += '\n'
43        self.cursor = 0
44        self.info = QAPISourceInfo(fname, 1, incl_info)
45        self.line_pos = 0
46        self.exprs = []
47        self.docs = []
48        self.accept()
49        cur_doc = None
50
51        while self.tok is not None:
52            info = self.info
53            if self.tok == '#':
54                self.reject_expr_doc(cur_doc)
55                for cur_doc in self.get_doc(info):
56                    self.docs.append(cur_doc)
57                continue
58
59            expr = self.get_expr(False)
60            if 'include' in expr:
61                self.reject_expr_doc(cur_doc)
62                if len(expr) != 1:
63                    raise QAPISemError(info, "invalid 'include' directive")
64                include = expr['include']
65                if not isinstance(include, str):
66                    raise QAPISemError(info,
67                                       "value of 'include' must be a string")
68                incl_fname = os.path.join(os.path.dirname(fname),
69                                          include)
70                self.exprs.append({'expr': {'include': incl_fname},
71                                   'info': info})
72                exprs_include = self._include(include, info, incl_fname,
73                                              previously_included)
74                if exprs_include:
75                    self.exprs.extend(exprs_include.exprs)
76                    self.docs.extend(exprs_include.docs)
77            elif "pragma" in expr:
78                self.reject_expr_doc(cur_doc)
79                if len(expr) != 1:
80                    raise QAPISemError(info, "invalid 'pragma' directive")
81                pragma = expr['pragma']
82                if not isinstance(pragma, dict):
83                    raise QAPISemError(
84                        info, "value of 'pragma' must be an object")
85                for name, value in pragma.items():
86                    self._pragma(name, value, info)
87            else:
88                expr_elem = {'expr': expr,
89                             'info': info}
90                if cur_doc:
91                    if not cur_doc.symbol:
92                        raise QAPISemError(
93                            cur_doc.info, "definition documentation required")
94                    expr_elem['doc'] = cur_doc
95                self.exprs.append(expr_elem)
96            cur_doc = None
97        self.reject_expr_doc(cur_doc)
98
99    @staticmethod
100    def reject_expr_doc(doc):
101        if doc and doc.symbol:
102            raise QAPISemError(
103                doc.info,
104                "documentation for '%s' is not followed by the definition"
105                % doc.symbol)
106
107    def _include(self, include, info, incl_fname, previously_included):
108        incl_abs_fname = os.path.abspath(incl_fname)
109        # catch inclusion cycle
110        inf = info
111        while inf:
112            if incl_abs_fname == os.path.abspath(inf.fname):
113                raise QAPISemError(info, "inclusion loop for %s" % include)
114            inf = inf.parent
115
116        # skip multiple include of the same file
117        if incl_abs_fname in previously_included:
118            return None
119
120        return QAPISchemaParser(incl_fname, previously_included, info)
121
122    def _pragma(self, name, value, info):
123        if name == 'doc-required':
124            if not isinstance(value, bool):
125                raise QAPISemError(info,
126                                   "pragma 'doc-required' must be boolean")
127            info.pragma.doc_required = value
128        elif name == 'returns-whitelist':
129            if (not isinstance(value, list)
130                    or any([not isinstance(elt, str) for elt in value])):
131                raise QAPISemError(
132                    info,
133                    "pragma returns-whitelist must be a list of strings")
134            info.pragma.returns_whitelist = value
135        elif name == 'name-case-whitelist':
136            if (not isinstance(value, list)
137                    or any([not isinstance(elt, str) for elt in value])):
138                raise QAPISemError(
139                    info,
140                    "pragma name-case-whitelist must be a list of strings")
141            info.pragma.name_case_whitelist = value
142        else:
143            raise QAPISemError(info, "unknown pragma '%s'" % name)
144
145    def accept(self, skip_comment=True):
146        while True:
147            self.tok = self.src[self.cursor]
148            self.pos = self.cursor
149            self.cursor += 1
150            self.val = None
151
152            if self.tok == '#':
153                if self.src[self.cursor] == '#':
154                    # Start of doc comment
155                    skip_comment = False
156                self.cursor = self.src.find('\n', self.cursor)
157                if not skip_comment:
158                    self.val = self.src[self.pos:self.cursor]
159                    return
160            elif self.tok in '{}:,[]':
161                return
162            elif self.tok == "'":
163                # Note: we accept only printable ASCII
164                string = ''
165                esc = False
166                while True:
167                    ch = self.src[self.cursor]
168                    self.cursor += 1
169                    if ch == '\n':
170                        raise QAPIParseError(self, "missing terminating \"'\"")
171                    if esc:
172                        # Note: we recognize only \\ because we have
173                        # no use for funny characters in strings
174                        if ch != '\\':
175                            raise QAPIParseError(self,
176                                                 "unknown escape \\%s" % ch)
177                        esc = False
178                    elif ch == '\\':
179                        esc = True
180                        continue
181                    elif ch == "'":
182                        self.val = string
183                        return
184                    if ord(ch) < 32 or ord(ch) >= 127:
185                        raise QAPIParseError(
186                            self, "funny character in string")
187                    string += ch
188            elif self.src.startswith('true', self.pos):
189                self.val = True
190                self.cursor += 3
191                return
192            elif self.src.startswith('false', self.pos):
193                self.val = False
194                self.cursor += 4
195                return
196            elif self.tok == '\n':
197                if self.cursor == len(self.src):
198                    self.tok = None
199                    return
200                self.info = self.info.next_line()
201                self.line_pos = self.cursor
202            elif not self.tok.isspace():
203                # Show up to next structural, whitespace or quote
204                # character
205                match = re.match('[^[\\]{}:,\\s\'"]+',
206                                 self.src[self.cursor-1:])
207                raise QAPIParseError(self, "stray '%s'" % match.group(0))
208
209    def get_members(self):
210        expr = OrderedDict()
211        if self.tok == '}':
212            self.accept()
213            return expr
214        if self.tok != "'":
215            raise QAPIParseError(self, "expected string or '}'")
216        while True:
217            key = self.val
218            self.accept()
219            if self.tok != ':':
220                raise QAPIParseError(self, "expected ':'")
221            self.accept()
222            if key in expr:
223                raise QAPIParseError(self, "duplicate key '%s'" % key)
224            expr[key] = self.get_expr(True)
225            if self.tok == '}':
226                self.accept()
227                return expr
228            if self.tok != ',':
229                raise QAPIParseError(self, "expected ',' or '}'")
230            self.accept()
231            if self.tok != "'":
232                raise QAPIParseError(self, "expected string")
233
234    def get_values(self):
235        expr = []
236        if self.tok == ']':
237            self.accept()
238            return expr
239        if self.tok not in "{['tf":
240            raise QAPIParseError(
241                self, "expected '{', '[', ']', string, or boolean")
242        while True:
243            expr.append(self.get_expr(True))
244            if self.tok == ']':
245                self.accept()
246                return expr
247            if self.tok != ',':
248                raise QAPIParseError(self, "expected ',' or ']'")
249            self.accept()
250
251    def get_expr(self, nested):
252        if self.tok != '{' and not nested:
253            raise QAPIParseError(self, "expected '{'")
254        if self.tok == '{':
255            self.accept()
256            expr = self.get_members()
257        elif self.tok == '[':
258            self.accept()
259            expr = self.get_values()
260        elif self.tok in "'tf":
261            expr = self.val
262            self.accept()
263        else:
264            raise QAPIParseError(
265                self, "expected '{', '[', string, or boolean")
266        return expr
267
268    def get_doc(self, info):
269        if self.val != '##':
270            raise QAPIParseError(
271                self, "junk after '##' at start of documentation comment")
272
273        docs = []
274        cur_doc = QAPIDoc(self, info)
275        self.accept(False)
276        while self.tok == '#':
277            if self.val.startswith('##'):
278                # End of doc comment
279                if self.val != '##':
280                    raise QAPIParseError(
281                        self,
282                        "junk after '##' at end of documentation comment")
283                cur_doc.end_comment()
284                docs.append(cur_doc)
285                self.accept()
286                return docs
287            if self.val.startswith('# ='):
288                if cur_doc.symbol:
289                    raise QAPIParseError(
290                        self,
291                        "unexpected '=' markup in definition documentation")
292                if cur_doc.body.text:
293                    cur_doc.end_comment()
294                    docs.append(cur_doc)
295                    cur_doc = QAPIDoc(self, info)
296            cur_doc.append(self.val)
297            self.accept(False)
298
299        raise QAPIParseError(self, "documentation comment must end with '##'")
300
301
302class QAPIDoc:
303    """
304    A documentation comment block, either definition or free-form
305
306    Definition documentation blocks consist of
307
308    * a body section: one line naming the definition, followed by an
309      overview (any number of lines)
310
311    * argument sections: a description of each argument (for commands
312      and events) or member (for structs, unions and alternates)
313
314    * features sections: a description of each feature flag
315
316    * additional (non-argument) sections, possibly tagged
317
318    Free-form documentation blocks consist only of a body section.
319    """
320
321    class Section:
322        def __init__(self, parser, name=None, indent=0):
323            # parser, for error messages about indentation
324            self._parser = parser
325            # optional section name (argument/member or section name)
326            self.name = name
327            self.text = ''
328            # the expected indent level of the text of this section
329            self._indent = indent
330
331        def append(self, line):
332            # Strip leading spaces corresponding to the expected indent level
333            # Blank lines are always OK.
334            if line:
335                indent = re.match(r'\s*', line).end()
336                if indent < self._indent:
337                    raise QAPIParseError(
338                        self._parser,
339                        "unexpected de-indent (expected at least %d spaces)" %
340                        self._indent)
341                line = line[self._indent:]
342
343            self.text += line.rstrip() + '\n'
344
345    class ArgSection(Section):
346        def __init__(self, parser, name, indent=0):
347            super().__init__(parser, name, indent)
348            self.member = None
349
350        def connect(self, member):
351            self.member = member
352
353    def __init__(self, parser, info):
354        # self._parser is used to report errors with QAPIParseError.  The
355        # resulting error position depends on the state of the parser.
356        # It happens to be the beginning of the comment.  More or less
357        # servicable, but action at a distance.
358        self._parser = parser
359        self.info = info
360        self.symbol = None
361        self.body = QAPIDoc.Section(parser)
362        # dict mapping parameter name to ArgSection
363        self.args = OrderedDict()
364        self.features = OrderedDict()
365        # a list of Section
366        self.sections = []
367        # the current section
368        self._section = self.body
369        self._append_line = self._append_body_line
370
371    def has_section(self, name):
372        """Return True if we have a section with this name."""
373        for i in self.sections:
374            if i.name == name:
375                return True
376        return False
377
378    def append(self, line):
379        """
380        Parse a comment line and add it to the documentation.
381
382        The way that the line is dealt with depends on which part of
383        the documentation we're parsing right now:
384        * The body section: ._append_line is ._append_body_line
385        * An argument section: ._append_line is ._append_args_line
386        * A features section: ._append_line is ._append_features_line
387        * An additional section: ._append_line is ._append_various_line
388        """
389        line = line[1:]
390        if not line:
391            self._append_freeform(line)
392            return
393
394        if line[0] != ' ':
395            raise QAPIParseError(self._parser, "missing space after #")
396        line = line[1:]
397        self._append_line(line)
398
399    def end_comment(self):
400        self._end_section()
401
402    @staticmethod
403    def _is_section_tag(name):
404        return name in ('Returns:', 'Since:',
405                        # those are often singular or plural
406                        'Note:', 'Notes:',
407                        'Example:', 'Examples:',
408                        'TODO:')
409
410    def _append_body_line(self, line):
411        """
412        Process a line of documentation text in the body section.
413
414        If this a symbol line and it is the section's first line, this
415        is a definition documentation block for that symbol.
416
417        If it's a definition documentation block, another symbol line
418        begins the argument section for the argument named by it, and
419        a section tag begins an additional section.  Start that
420        section and append the line to it.
421
422        Else, append the line to the current section.
423        """
424        name = line.split(' ', 1)[0]
425        # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
426        # recognized, and get silently treated as ordinary text
427        if not self.symbol and not self.body.text and line.startswith('@'):
428            if not line.endswith(':'):
429                raise QAPIParseError(self._parser, "line should end with ':'")
430            self.symbol = line[1:-1]
431            # FIXME invalid names other than the empty string aren't flagged
432            if not self.symbol:
433                raise QAPIParseError(self._parser, "invalid name")
434        elif self.symbol:
435            # This is a definition documentation block
436            if name.startswith('@') and name.endswith(':'):
437                self._append_line = self._append_args_line
438                self._append_args_line(line)
439            elif line == 'Features:':
440                self._append_line = self._append_features_line
441            elif self._is_section_tag(name):
442                self._append_line = self._append_various_line
443                self._append_various_line(line)
444            else:
445                self._append_freeform(line)
446        else:
447            # This is a free-form documentation block
448            self._append_freeform(line)
449
450    def _append_args_line(self, line):
451        """
452        Process a line of documentation text in an argument section.
453
454        A symbol line begins the next argument section, a section tag
455        section or a non-indented line after a blank line begins an
456        additional section.  Start that section and append the line to
457        it.
458
459        Else, append the line to the current section.
460
461        """
462        name = line.split(' ', 1)[0]
463
464        if name.startswith('@') and name.endswith(':'):
465            # If line is "@arg:   first line of description", find
466            # the index of 'f', which is the indent we expect for any
467            # following lines.  We then remove the leading "@arg:"
468            # from line and replace it with spaces so that 'f' has the
469            # same index as it did in the original line and can be
470            # handled the same way we will handle following lines.
471            indent = re.match(r'@\S*:\s*', line).end()
472            line = line[indent:]
473            if not line:
474                # Line was just the "@arg:" header; following lines
475                # are not indented
476                indent = 0
477            else:
478                line = ' ' * indent + line
479            self._start_args_section(name[1:-1], indent)
480        elif self._is_section_tag(name):
481            self._append_line = self._append_various_line
482            self._append_various_line(line)
483            return
484        elif (self._section.text.endswith('\n\n')
485              and line and not line[0].isspace()):
486            if line == 'Features:':
487                self._append_line = self._append_features_line
488            else:
489                self._start_section()
490                self._append_line = self._append_various_line
491                self._append_various_line(line)
492            return
493
494        self._append_freeform(line)
495
496    def _append_features_line(self, line):
497        name = line.split(' ', 1)[0]
498
499        if name.startswith('@') and name.endswith(':'):
500            # If line is "@arg:   first line of description", find
501            # the index of 'f', which is the indent we expect for any
502            # following lines.  We then remove the leading "@arg:"
503            # from line and replace it with spaces so that 'f' has the
504            # same index as it did in the original line and can be
505            # handled the same way we will handle following lines.
506            indent = re.match(r'@\S*:\s*', line).end()
507            line = line[indent:]
508            if not line:
509                # Line was just the "@arg:" header; following lines
510                # are not indented
511                indent = 0
512            else:
513                line = ' ' * indent + line
514            self._start_features_section(name[1:-1], indent)
515        elif self._is_section_tag(name):
516            self._append_line = self._append_various_line
517            self._append_various_line(line)
518            return
519        elif (self._section.text.endswith('\n\n')
520              and line and not line[0].isspace()):
521            self._start_section()
522            self._append_line = self._append_various_line
523            self._append_various_line(line)
524            return
525
526        self._append_freeform(line)
527
528    def _append_various_line(self, line):
529        """
530        Process a line of documentation text in an additional section.
531
532        A symbol line is an error.
533
534        A section tag begins an additional section.  Start that
535        section and append the line to it.
536
537        Else, append the line to the current section.
538        """
539        name = line.split(' ', 1)[0]
540
541        if name.startswith('@') and name.endswith(':'):
542            raise QAPIParseError(self._parser,
543                                 "'%s' can't follow '%s' section"
544                                 % (name, self.sections[0].name))
545        if self._is_section_tag(name):
546            # If line is "Section:   first line of description", find
547            # the index of 'f', which is the indent we expect for any
548            # following lines.  We then remove the leading "Section:"
549            # from line and replace it with spaces so that 'f' has the
550            # same index as it did in the original line and can be
551            # handled the same way we will handle following lines.
552            indent = re.match(r'\S*:\s*', line).end()
553            line = line[indent:]
554            if not line:
555                # Line was just the "Section:" header; following lines
556                # are not indented
557                indent = 0
558            else:
559                line = ' ' * indent + line
560            self._start_section(name[:-1], indent)
561
562        self._append_freeform(line)
563
564    def _start_symbol_section(self, symbols_dict, name, indent):
565        # FIXME invalid names other than the empty string aren't flagged
566        if not name:
567            raise QAPIParseError(self._parser, "invalid parameter name")
568        if name in symbols_dict:
569            raise QAPIParseError(self._parser,
570                                 "'%s' parameter name duplicated" % name)
571        assert not self.sections
572        self._end_section()
573        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
574        symbols_dict[name] = self._section
575
576    def _start_args_section(self, name, indent):
577        self._start_symbol_section(self.args, name, indent)
578
579    def _start_features_section(self, name, indent):
580        self._start_symbol_section(self.features, name, indent)
581
582    def _start_section(self, name=None, indent=0):
583        if name in ('Returns', 'Since') and self.has_section(name):
584            raise QAPIParseError(self._parser,
585                                 "duplicated '%s' section" % name)
586        self._end_section()
587        self._section = QAPIDoc.Section(self._parser, name, indent)
588        self.sections.append(self._section)
589
590    def _end_section(self):
591        if self._section:
592            text = self._section.text = self._section.text.strip()
593            if self._section.name and (not text or text.isspace()):
594                raise QAPIParseError(
595                    self._parser,
596                    "empty doc section '%s'" % self._section.name)
597            self._section = None
598
599    def _append_freeform(self, line):
600        match = re.match(r'(@\S+:)', line)
601        if match:
602            raise QAPIParseError(self._parser,
603                                 "'%s' not allowed in free-form documentation"
604                                 % match.group(1))
605        self._section.append(line)
606
607    def connect_member(self, member):
608        if member.name not in self.args:
609            # Undocumented TODO outlaw
610            self.args[member.name] = QAPIDoc.ArgSection(self._parser,
611                                                        member.name)
612        self.args[member.name].connect(member)
613
614    def connect_feature(self, feature):
615        if feature.name not in self.features:
616            raise QAPISemError(feature.info,
617                               "feature '%s' lacks documentation"
618                               % feature.name)
619        self.features[feature.name].connect(feature)
620
621    def check_expr(self, expr):
622        if self.has_section('Returns') and 'command' not in expr:
623            raise QAPISemError(self.info,
624                               "'Returns:' is only valid for commands")
625
626    def check(self):
627
628        def check_args_section(args, info, what):
629            bogus = [name for name, section in args.items()
630                     if not section.member]
631            if bogus:
632                raise QAPISemError(
633                    self.info,
634                    "documented member%s '%s' %s not exist"
635                    % ("s" if len(bogus) > 1 else "",
636                       "', '".join(bogus),
637                       "do" if len(bogus) > 1 else "does"))
638
639        check_args_section(self.args, self.info, 'members')
640        check_args_section(self.features, self.info, 'features')
641