xref: /qemu/scripts/decodetree.py (revision 78f8d497)
1#!/usr/bin/env python3
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20# See the syntax and semantics in docs/devel/decodetree.rst.
21#
22
23import os
24import re
25import sys
26import getopt
27
28insnwidth = 32
29insnmask = 0xffffffff
30variablewidth = False
31fields = {}
32arguments = {}
33formats = {}
34patterns = []
35allpatterns = []
36anyextern = False
37
38translate_prefix = 'trans'
39translate_scope = 'static '
40input_file = ''
41output_file = None
42output_fd = None
43insntype = 'uint32_t'
44decode_function = 'decode'
45
46re_ident = '[a-zA-Z][a-zA-Z0-9_]*'
47
48
49def error_with_file(file, lineno, *args):
50    """Print an error message from file:line and args and exit."""
51    global output_file
52    global output_fd
53
54    if lineno:
55        r = '{0}:{1}: error:'.format(file, lineno)
56    elif input_file:
57        r = '{0}: error:'.format(file)
58    else:
59        r = 'error:'
60    for a in args:
61        r += ' ' + str(a)
62    r += '\n'
63    sys.stderr.write(r)
64    if output_file and output_fd:
65        output_fd.close()
66        os.remove(output_file)
67    exit(1)
68
69def error(lineno, *args):
70    error_with_file(input_file, lineno, args)
71
72def output(*args):
73    global output_fd
74    for a in args:
75        output_fd.write(a)
76
77
78def output_autogen():
79    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
80
81
82def str_indent(c):
83    """Return a string with C spaces"""
84    return ' ' * c
85
86
87def str_fields(fields):
88    """Return a string uniquely identifing FIELDS"""
89    r = ''
90    for n in sorted(fields.keys()):
91        r += '_' + n
92    return r[1:]
93
94
95def str_match_bits(bits, mask):
96    """Return a string pretty-printing BITS/MASK"""
97    global insnwidth
98
99    i = 1 << (insnwidth - 1)
100    space = 0x01010100
101    r = ''
102    while i != 0:
103        if i & mask:
104            if i & bits:
105                r += '1'
106            else:
107                r += '0'
108        else:
109            r += '.'
110        if i & space:
111            r += ' '
112        i >>= 1
113    return r
114
115
116def is_pow2(x):
117    """Return true iff X is equal to a power of 2."""
118    return (x & (x - 1)) == 0
119
120
121def ctz(x):
122    """Return the number of times 2 factors into X."""
123    r = 0
124    while ((x >> r) & 1) == 0:
125        r += 1
126    return r
127
128
129def is_contiguous(bits):
130    shift = ctz(bits)
131    if is_pow2((bits >> shift) + 1):
132        return shift
133    else:
134        return -1
135
136
137def eq_fields_for_args(flds_a, flds_b):
138    if len(flds_a) != len(flds_b):
139        return False
140    for k, a in flds_a.items():
141        if k not in flds_b:
142            return False
143    return True
144
145
146def eq_fields_for_fmts(flds_a, flds_b):
147    if len(flds_a) != len(flds_b):
148        return False
149    for k, a in flds_a.items():
150        if k not in flds_b:
151            return False
152        b = flds_b[k]
153        if a.__class__ != b.__class__ or a != b:
154            return False
155    return True
156
157
158class Field:
159    """Class representing a simple instruction field"""
160    def __init__(self, sign, pos, len):
161        self.sign = sign
162        self.pos = pos
163        self.len = len
164        self.mask = ((1 << len) - 1) << pos
165
166    def __str__(self):
167        if self.sign:
168            s = 's'
169        else:
170            s = ''
171        return str(self.pos) + ':' + s + str(self.len)
172
173    def str_extract(self):
174        if self.sign:
175            extr = 'sextract32'
176        else:
177            extr = 'extract32'
178        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
179
180    def __eq__(self, other):
181        return self.sign == other.sign and self.mask == other.mask
182
183    def __ne__(self, other):
184        return not self.__eq__(other)
185# end Field
186
187
188class MultiField:
189    """Class representing a compound instruction field"""
190    def __init__(self, subs, mask):
191        self.subs = subs
192        self.sign = subs[0].sign
193        self.mask = mask
194
195    def __str__(self):
196        return str(self.subs)
197
198    def str_extract(self):
199        ret = '0'
200        pos = 0
201        for f in reversed(self.subs):
202            if pos == 0:
203                ret = f.str_extract()
204            else:
205                ret = 'deposit32({0}, {1}, {2}, {3})' \
206                      .format(ret, pos, 32 - pos, f.str_extract())
207            pos += f.len
208        return ret
209
210    def __ne__(self, other):
211        if len(self.subs) != len(other.subs):
212            return True
213        for a, b in zip(self.subs, other.subs):
214            if a.__class__ != b.__class__ or a != b:
215                return True
216        return False
217
218    def __eq__(self, other):
219        return not self.__ne__(other)
220# end MultiField
221
222
223class ConstField:
224    """Class representing an argument field with constant value"""
225    def __init__(self, value):
226        self.value = value
227        self.mask = 0
228        self.sign = value < 0
229
230    def __str__(self):
231        return str(self.value)
232
233    def str_extract(self):
234        return str(self.value)
235
236    def __cmp__(self, other):
237        return self.value - other.value
238# end ConstField
239
240
241class FunctionField:
242    """Class representing a field passed through a function"""
243    def __init__(self, func, base):
244        self.mask = base.mask
245        self.sign = base.sign
246        self.base = base
247        self.func = func
248
249    def __str__(self):
250        return self.func + '(' + str(self.base) + ')'
251
252    def str_extract(self):
253        return self.func + '(ctx, ' + self.base.str_extract() + ')'
254
255    def __eq__(self, other):
256        return self.func == other.func and self.base == other.base
257
258    def __ne__(self, other):
259        return not self.__eq__(other)
260# end FunctionField
261
262
263class ParameterField:
264    """Class representing a pseudo-field read from a function"""
265    def __init__(self, func):
266        self.mask = 0
267        self.sign = 0
268        self.func = func
269
270    def __str__(self):
271        return self.func
272
273    def str_extract(self):
274        return self.func + '(ctx)'
275
276    def __eq__(self, other):
277        return self.func == other.func
278
279    def __ne__(self, other):
280        return not self.__eq__(other)
281# end ParameterField
282
283
284class Arguments:
285    """Class representing the extracted fields of a format"""
286    def __init__(self, nm, flds, extern):
287        self.name = nm
288        self.extern = extern
289        self.fields = sorted(flds)
290
291    def __str__(self):
292        return self.name + ' ' + str(self.fields)
293
294    def struct_name(self):
295        return 'arg_' + self.name
296
297    def output_def(self):
298        if not self.extern:
299            output('typedef struct {\n')
300            for n in self.fields:
301                output('    int ', n, ';\n')
302            output('} ', self.struct_name(), ';\n\n')
303# end Arguments
304
305
306class General:
307    """Common code between instruction formats and instruction patterns"""
308    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
309        self.name = name
310        self.file = input_file
311        self.lineno = lineno
312        self.base = base
313        self.fixedbits = fixb
314        self.fixedmask = fixm
315        self.undefmask = udfm
316        self.fieldmask = fldm
317        self.fields = flds
318        self.width = w
319
320    def __str__(self):
321        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
322
323    def str1(self, i):
324        return str_indent(i) + self.__str__()
325# end General
326
327
328class Format(General):
329    """Class representing an instruction format"""
330
331    def extract_name(self):
332        global decode_function
333        return decode_function + '_extract_' + self.name
334
335    def output_extract(self):
336        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
337               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
338        for n, f in self.fields.items():
339            output('    a->', n, ' = ', f.str_extract(), ';\n')
340        output('}\n\n')
341# end Format
342
343
344class Pattern(General):
345    """Class representing an instruction pattern"""
346
347    def output_decl(self):
348        global translate_scope
349        global translate_prefix
350        output('typedef ', self.base.base.struct_name(),
351               ' arg_', self.name, ';\n')
352        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
353               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
354
355    def output_code(self, i, extracted, outerbits, outermask):
356        global translate_prefix
357        ind = str_indent(i)
358        arg = self.base.base.name
359        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
360        if not extracted:
361            output(ind, self.base.extract_name(),
362                   '(ctx, &u.f_', arg, ', insn);\n')
363        for n, f in self.fields.items():
364            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
365        output(ind, 'if (', translate_prefix, '_', self.name,
366               '(ctx, &u.f_', arg, ')) return true;\n')
367# end Pattern
368
369
370class MultiPattern(General):
371    """Class representing an overlapping set of instruction patterns"""
372
373    def __init__(self, lineno, pats, fixb, fixm, udfm, w):
374        self.file = input_file
375        self.lineno = lineno
376        self.pats = pats
377        self.base = None
378        self.fixedbits = fixb
379        self.fixedmask = fixm
380        self.undefmask = udfm
381        self.width = w
382
383    def __str__(self):
384        r = "{"
385        for p in self.pats:
386           r = r + ' ' + str(p)
387        return r + "}"
388
389    def output_decl(self):
390        for p in self.pats:
391            p.output_decl()
392
393    def output_code(self, i, extracted, outerbits, outermask):
394        global translate_prefix
395        ind = str_indent(i)
396        for p in self.pats:
397            if outermask != p.fixedmask:
398                innermask = p.fixedmask & ~outermask
399                innerbits = p.fixedbits & ~outermask
400                output(ind, 'if ((insn & ',
401                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
402                       ') {\n')
403                output(ind, '    /* ',
404                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
405                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
406                output(ind, '}\n')
407            else:
408                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
409#end MultiPattern
410
411
412def parse_field(lineno, name, toks):
413    """Parse one instruction field from TOKS at LINENO"""
414    global fields
415    global re_ident
416    global insnwidth
417
418    # A "simple" field will have only one entry;
419    # a "multifield" will have several.
420    subs = []
421    width = 0
422    func = None
423    for t in toks:
424        if re.fullmatch('!function=' + re_ident, t):
425            if func:
426                error(lineno, 'duplicate function')
427            func = t.split('=')
428            func = func[1]
429            continue
430
431        if re.fullmatch('[0-9]+:s[0-9]+', t):
432            # Signed field extract
433            subtoks = t.split(':s')
434            sign = True
435        elif re.fullmatch('[0-9]+:[0-9]+', t):
436            # Unsigned field extract
437            subtoks = t.split(':')
438            sign = False
439        else:
440            error(lineno, 'invalid field token "{0}"'.format(t))
441        po = int(subtoks[0])
442        le = int(subtoks[1])
443        if po + le > insnwidth:
444            error(lineno, 'field {0} too large'.format(t))
445        f = Field(sign, po, le)
446        subs.append(f)
447        width += le
448
449    if width > insnwidth:
450        error(lineno, 'field too large')
451    if len(subs) == 0:
452        if func:
453            f = ParameterField(func)
454        else:
455            error(lineno, 'field with no value')
456    else:
457        if len(subs) == 1:
458            f = subs[0]
459        else:
460            mask = 0
461            for s in subs:
462                if mask & s.mask:
463                    error(lineno, 'field components overlap')
464                mask |= s.mask
465            f = MultiField(subs, mask)
466        if func:
467            f = FunctionField(func, f)
468
469    if name in fields:
470        error(lineno, 'duplicate field', name)
471    fields[name] = f
472# end parse_field
473
474
475def parse_arguments(lineno, name, toks):
476    """Parse one argument set from TOKS at LINENO"""
477    global arguments
478    global re_ident
479    global anyextern
480
481    flds = []
482    extern = False
483    for t in toks:
484        if re.fullmatch('!extern', t):
485            extern = True
486            anyextern = True
487            continue
488        if not re.fullmatch(re_ident, t):
489            error(lineno, 'invalid argument set token "{0}"'.format(t))
490        if t in flds:
491            error(lineno, 'duplicate argument "{0}"'.format(t))
492        flds.append(t)
493
494    if name in arguments:
495        error(lineno, 'duplicate argument set', name)
496    arguments[name] = Arguments(name, flds, extern)
497# end parse_arguments
498
499
500def lookup_field(lineno, name):
501    global fields
502    if name in fields:
503        return fields[name]
504    error(lineno, 'undefined field', name)
505
506
507def add_field(lineno, flds, new_name, f):
508    if new_name in flds:
509        error(lineno, 'duplicate field', new_name)
510    flds[new_name] = f
511    return flds
512
513
514def add_field_byname(lineno, flds, new_name, old_name):
515    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
516
517
518def infer_argument_set(flds):
519    global arguments
520    global decode_function
521
522    for arg in arguments.values():
523        if eq_fields_for_args(flds, arg.fields):
524            return arg
525
526    name = decode_function + str(len(arguments))
527    arg = Arguments(name, flds.keys(), False)
528    arguments[name] = arg
529    return arg
530
531
532def infer_format(arg, fieldmask, flds, width):
533    global arguments
534    global formats
535    global decode_function
536
537    const_flds = {}
538    var_flds = {}
539    for n, c in flds.items():
540        if c is ConstField:
541            const_flds[n] = c
542        else:
543            var_flds[n] = c
544
545    # Look for an existing format with the same argument set and fields
546    for fmt in formats.values():
547        if arg and fmt.base != arg:
548            continue
549        if fieldmask != fmt.fieldmask:
550            continue
551        if width != fmt.width:
552            continue
553        if not eq_fields_for_fmts(flds, fmt.fields):
554            continue
555        return (fmt, const_flds)
556
557    name = decode_function + '_Fmt_' + str(len(formats))
558    if not arg:
559        arg = infer_argument_set(flds)
560
561    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
562    formats[name] = fmt
563
564    return (fmt, const_flds)
565# end infer_format
566
567
568def parse_generic(lineno, is_format, name, toks):
569    """Parse one instruction format from TOKS at LINENO"""
570    global fields
571    global arguments
572    global formats
573    global patterns
574    global allpatterns
575    global re_ident
576    global insnwidth
577    global insnmask
578    global variablewidth
579
580    fixedmask = 0
581    fixedbits = 0
582    undefmask = 0
583    width = 0
584    flds = {}
585    arg = None
586    fmt = None
587    for t in toks:
588        # '&Foo' gives a format an explcit argument set.
589        if t[0] == '&':
590            tt = t[1:]
591            if arg:
592                error(lineno, 'multiple argument sets')
593            if tt in arguments:
594                arg = arguments[tt]
595            else:
596                error(lineno, 'undefined argument set', t)
597            continue
598
599        # '@Foo' gives a pattern an explicit format.
600        if t[0] == '@':
601            tt = t[1:]
602            if fmt:
603                error(lineno, 'multiple formats')
604            if tt in formats:
605                fmt = formats[tt]
606            else:
607                error(lineno, 'undefined format', t)
608            continue
609
610        # '%Foo' imports a field.
611        if t[0] == '%':
612            tt = t[1:]
613            flds = add_field_byname(lineno, flds, tt, tt)
614            continue
615
616        # 'Foo=%Bar' imports a field with a different name.
617        if re.fullmatch(re_ident + '=%' + re_ident, t):
618            (fname, iname) = t.split('=%')
619            flds = add_field_byname(lineno, flds, fname, iname)
620            continue
621
622        # 'Foo=number' sets an argument field to a constant value
623        if re.fullmatch(re_ident + '=[+-]?[0-9]+', t):
624            (fname, value) = t.split('=')
625            value = int(value)
626            flds = add_field(lineno, flds, fname, ConstField(value))
627            continue
628
629        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
630        # required ones, or dont-cares.
631        if re.fullmatch('[01.-]+', t):
632            shift = len(t)
633            fms = t.replace('0', '1')
634            fms = fms.replace('.', '0')
635            fms = fms.replace('-', '0')
636            fbs = t.replace('.', '0')
637            fbs = fbs.replace('-', '0')
638            ubm = t.replace('1', '0')
639            ubm = ubm.replace('.', '0')
640            ubm = ubm.replace('-', '1')
641            fms = int(fms, 2)
642            fbs = int(fbs, 2)
643            ubm = int(ubm, 2)
644            fixedbits = (fixedbits << shift) | fbs
645            fixedmask = (fixedmask << shift) | fms
646            undefmask = (undefmask << shift) | ubm
647        # Otherwise, fieldname:fieldwidth
648        elif re.fullmatch(re_ident + ':s?[0-9]+', t):
649            (fname, flen) = t.split(':')
650            sign = False
651            if flen[0] == 's':
652                sign = True
653                flen = flen[1:]
654            shift = int(flen, 10)
655            if shift + width > insnwidth:
656                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
657            f = Field(sign, insnwidth - width - shift, shift)
658            flds = add_field(lineno, flds, fname, f)
659            fixedbits <<= shift
660            fixedmask <<= shift
661            undefmask <<= shift
662        else:
663            error(lineno, 'invalid token "{0}"'.format(t))
664        width += shift
665
666    if variablewidth and width < insnwidth and width % 8 == 0:
667        shift = insnwidth - width
668        fixedbits <<= shift
669        fixedmask <<= shift
670        undefmask <<= shift
671        undefmask |= (1 << shift) - 1
672
673    # We should have filled in all of the bits of the instruction.
674    elif not (is_format and width == 0) and width != insnwidth:
675        error(lineno, 'definition has {0} bits'.format(width))
676
677    # Do not check for fields overlaping fields; one valid usage
678    # is to be able to duplicate fields via import.
679    fieldmask = 0
680    for f in flds.values():
681        fieldmask |= f.mask
682
683    # Fix up what we've parsed to match either a format or a pattern.
684    if is_format:
685        # Formats cannot reference formats.
686        if fmt:
687            error(lineno, 'format referencing format')
688        # If an argument set is given, then there should be no fields
689        # without a place to store it.
690        if arg:
691            for f in flds.keys():
692                if f not in arg.fields:
693                    error(lineno, 'field {0} not in argument set {1}'
694                                  .format(f, arg.name))
695        else:
696            arg = infer_argument_set(flds)
697        if name in formats:
698            error(lineno, 'duplicate format name', name)
699        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
700                     undefmask, fieldmask, flds, width)
701        formats[name] = fmt
702    else:
703        # Patterns can reference a format ...
704        if fmt:
705            # ... but not an argument simultaneously
706            if arg:
707                error(lineno, 'pattern specifies both format and argument set')
708            if fixedmask & fmt.fixedmask:
709                error(lineno, 'pattern fixed bits overlap format fixed bits')
710            if width != fmt.width:
711                error(lineno, 'pattern uses format of different width')
712            fieldmask |= fmt.fieldmask
713            fixedbits |= fmt.fixedbits
714            fixedmask |= fmt.fixedmask
715            undefmask |= fmt.undefmask
716        else:
717            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
718        arg = fmt.base
719        for f in flds.keys():
720            if f not in arg.fields:
721                error(lineno, 'field {0} not in argument set {1}'
722                              .format(f, arg.name))
723            if f in fmt.fields.keys():
724                error(lineno, 'field {0} set by format and pattern'.format(f))
725        for f in arg.fields:
726            if f not in flds.keys() and f not in fmt.fields.keys():
727                error(lineno, 'field {0} not initialized'.format(f))
728        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
729                      undefmask, fieldmask, flds, width)
730        patterns.append(pat)
731        allpatterns.append(pat)
732
733    # Validate the masks that we have assembled.
734    if fieldmask & fixedmask:
735        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
736                      .format(fieldmask, fixedmask))
737    if fieldmask & undefmask:
738        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
739                      .format(fieldmask, undefmask))
740    if fixedmask & undefmask:
741        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
742                      .format(fixedmask, undefmask))
743    if not is_format:
744        allbits = fieldmask | fixedmask | undefmask
745        if allbits != insnmask:
746            error(lineno, 'bits left unspecified (0x{0:08x})'
747                          .format(allbits ^ insnmask))
748# end parse_general
749
750def build_multi_pattern(lineno, pats):
751    """Validate the Patterns going into a MultiPattern."""
752    global patterns
753    global insnmask
754
755    if len(pats) < 2:
756        error(lineno, 'less than two patterns within braces')
757
758    fixedmask = insnmask
759    undefmask = insnmask
760
761    # Collect fixed/undefmask for all of the children.
762    # Move the defining lineno back to that of the first child.
763    for p in pats:
764        fixedmask &= p.fixedmask
765        undefmask &= p.undefmask
766        if p.lineno < lineno:
767            lineno = p.lineno
768
769    width = None
770    for p in pats:
771        if width is None:
772            width = p.width
773        elif width != p.width:
774            error(lineno, 'width mismatch in patterns within braces')
775
776    repeat = True
777    while repeat:
778        if fixedmask == 0:
779            error(lineno, 'no overlap in patterns within braces')
780        fixedbits = None
781        for p in pats:
782            thisbits = p.fixedbits & fixedmask
783            if fixedbits is None:
784                fixedbits = thisbits
785            elif fixedbits != thisbits:
786                fixedmask &= ~(fixedbits ^ thisbits)
787                break
788        else:
789            repeat = False
790
791    mp = MultiPattern(lineno, pats, fixedbits, fixedmask, undefmask, width)
792    patterns.append(mp)
793# end build_multi_pattern
794
795def parse_file(f):
796    """Parse all of the patterns within a file"""
797
798    global patterns
799
800    # Read all of the lines of the file.  Concatenate lines
801    # ending in backslash; discard empty lines and comments.
802    toks = []
803    lineno = 0
804    nesting = 0
805    saved_pats = []
806
807    for line in f:
808        lineno += 1
809
810        # Expand and strip spaces, to find indent.
811        line = line.rstrip()
812        line = line.expandtabs()
813        len1 = len(line)
814        line = line.lstrip()
815        len2 = len(line)
816
817        # Discard comments
818        end = line.find('#')
819        if end >= 0:
820            line = line[:end]
821
822        t = line.split()
823        if len(toks) != 0:
824            # Next line after continuation
825            toks.extend(t)
826        else:
827            # Allow completely blank lines.
828            if len1 == 0:
829                continue
830            indent = len1 - len2
831            # Empty line due to comment.
832            if len(t) == 0:
833                # Indentation must be correct, even for comment lines.
834                if indent != nesting:
835                    error(lineno, 'indentation ', indent, ' != ', nesting)
836                continue
837            start_lineno = lineno
838            toks = t
839
840        # Continuation?
841        if toks[-1] == '\\':
842            toks.pop()
843            continue
844
845        name = toks[0]
846        del toks[0]
847
848        # End nesting?
849        if name == '}':
850            if nesting == 0:
851                error(start_lineno, 'mismatched close brace')
852            if len(toks) != 0:
853                error(start_lineno, 'extra tokens after close brace')
854            nesting -= 2
855            if indent != nesting:
856                error(start_lineno, 'indentation ', indent, ' != ', nesting)
857            pats = patterns
858            patterns = saved_pats.pop()
859            build_multi_pattern(lineno, pats)
860            toks = []
861            continue
862
863        # Everything else should have current indentation.
864        if indent != nesting:
865            error(start_lineno, 'indentation ', indent, ' != ', nesting)
866
867        # Start nesting?
868        if name == '{':
869            if len(toks) != 0:
870                error(start_lineno, 'extra tokens after open brace')
871            saved_pats.append(patterns)
872            patterns = []
873            nesting += 2
874            toks = []
875            continue
876
877        # Determine the type of object needing to be parsed.
878        if name[0] == '%':
879            parse_field(start_lineno, name[1:], toks)
880        elif name[0] == '&':
881            parse_arguments(start_lineno, name[1:], toks)
882        elif name[0] == '@':
883            parse_generic(start_lineno, True, name[1:], toks)
884        else:
885            parse_generic(start_lineno, False, name, toks)
886        toks = []
887# end parse_file
888
889
890class Tree:
891    """Class representing a node in a decode tree"""
892
893    def __init__(self, fm, tm):
894        self.fixedmask = fm
895        self.thismask = tm
896        self.subs = []
897        self.base = None
898
899    def str1(self, i):
900        ind = str_indent(i)
901        r = '{0}{1:08x}'.format(ind, self.fixedmask)
902        if self.format:
903            r += ' ' + self.format.name
904        r += ' [\n'
905        for (b, s) in self.subs:
906            r += '{0}  {1:08x}:\n'.format(ind, b)
907            r += s.str1(i + 4) + '\n'
908        r += ind + ']'
909        return r
910
911    def __str__(self):
912        return self.str1(0)
913
914    def output_code(self, i, extracted, outerbits, outermask):
915        ind = str_indent(i)
916
917        # If we identified all nodes below have the same format,
918        # extract the fields now.
919        if not extracted and self.base:
920            output(ind, self.base.extract_name(),
921                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
922            extracted = True
923
924        # Attempt to aid the compiler in producing compact switch statements.
925        # If the bits in the mask are contiguous, extract them.
926        sh = is_contiguous(self.thismask)
927        if sh > 0:
928            # Propagate SH down into the local functions.
929            def str_switch(b, sh=sh):
930                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
931
932            def str_case(b, sh=sh):
933                return '0x{0:x}'.format(b >> sh)
934        else:
935            def str_switch(b):
936                return 'insn & 0x{0:08x}'.format(b)
937
938            def str_case(b):
939                return '0x{0:08x}'.format(b)
940
941        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
942        for b, s in sorted(self.subs):
943            assert (self.thismask & ~s.fixedmask) == 0
944            innermask = outermask | self.thismask
945            innerbits = outerbits | b
946            output(ind, 'case ', str_case(b), ':\n')
947            output(ind, '    /* ',
948                   str_match_bits(innerbits, innermask), ' */\n')
949            s.output_code(i + 4, extracted, innerbits, innermask)
950            output(ind, '    return false;\n')
951        output(ind, '}\n')
952# end Tree
953
954
955def build_tree(pats, outerbits, outermask):
956    # Find the intersection of all remaining fixedmask.
957    innermask = ~outermask & insnmask
958    for i in pats:
959        innermask &= i.fixedmask
960
961    if innermask == 0:
962        text = 'overlapping patterns:'
963        for p in pats:
964            text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
965        error_with_file(pats[0].file, pats[0].lineno, text)
966
967    fullmask = outermask | innermask
968
969    # Sort each element of pats into the bin selected by the mask.
970    bins = {}
971    for i in pats:
972        fb = i.fixedbits & innermask
973        if fb in bins:
974            bins[fb].append(i)
975        else:
976            bins[fb] = [i]
977
978    # We must recurse if any bin has more than one element or if
979    # the single element in the bin has not been fully matched.
980    t = Tree(fullmask, innermask)
981
982    for b, l in bins.items():
983        s = l[0]
984        if len(l) > 1 or s.fixedmask & ~fullmask != 0:
985            s = build_tree(l, b | outerbits, fullmask)
986        t.subs.append((b, s))
987
988    return t
989# end build_tree
990
991
992class SizeTree:
993    """Class representing a node in a size decode tree"""
994
995    def __init__(self, m, w):
996        self.mask = m
997        self.subs = []
998        self.base = None
999        self.width = w
1000
1001    def str1(self, i):
1002        ind = str_indent(i)
1003        r = '{0}{1:08x}'.format(ind, self.mask)
1004        r += ' [\n'
1005        for (b, s) in self.subs:
1006            r += '{0}  {1:08x}:\n'.format(ind, b)
1007            r += s.str1(i + 4) + '\n'
1008        r += ind + ']'
1009        return r
1010
1011    def __str__(self):
1012        return self.str1(0)
1013
1014    def output_code(self, i, extracted, outerbits, outermask):
1015        ind = str_indent(i)
1016
1017        # If we need to load more bytes to test, do so now.
1018        if extracted < self.width:
1019            output(ind, 'insn = ', decode_function,
1020                   '_load_bytes(ctx, insn, {0}, {1});\n'
1021                   .format(extracted // 8, self.width // 8));
1022            extracted = self.width
1023
1024        # Attempt to aid the compiler in producing compact switch statements.
1025        # If the bits in the mask are contiguous, extract them.
1026        sh = is_contiguous(self.mask)
1027        if sh > 0:
1028            # Propagate SH down into the local functions.
1029            def str_switch(b, sh=sh):
1030                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1031
1032            def str_case(b, sh=sh):
1033                return '0x{0:x}'.format(b >> sh)
1034        else:
1035            def str_switch(b):
1036                return 'insn & 0x{0:08x}'.format(b)
1037
1038            def str_case(b):
1039                return '0x{0:08x}'.format(b)
1040
1041        output(ind, 'switch (', str_switch(self.mask), ') {\n')
1042        for b, s in sorted(self.subs):
1043            innermask = outermask | self.mask
1044            innerbits = outerbits | b
1045            output(ind, 'case ', str_case(b), ':\n')
1046            output(ind, '    /* ',
1047                   str_match_bits(innerbits, innermask), ' */\n')
1048            s.output_code(i + 4, extracted, innerbits, innermask)
1049        output(ind, '}\n')
1050        output(ind, 'return insn;\n')
1051# end SizeTree
1052
1053class SizeLeaf:
1054    """Class representing a leaf node in a size decode tree"""
1055
1056    def __init__(self, m, w):
1057        self.mask = m
1058        self.width = w
1059
1060    def str1(self, i):
1061        ind = str_indent(i)
1062        return '{0}{1:08x}'.format(ind, self.mask)
1063
1064    def __str__(self):
1065        return self.str1(0)
1066
1067    def output_code(self, i, extracted, outerbits, outermask):
1068        global decode_function
1069        ind = str_indent(i)
1070
1071        # If we need to load more bytes, do so now.
1072        if extracted < self.width:
1073            output(ind, 'insn = ', decode_function,
1074                   '_load_bytes(ctx, insn, {0}, {1});\n'
1075                   .format(extracted // 8, self.width // 8));
1076            extracted = self.width
1077        output(ind, 'return insn;\n')
1078# end SizeLeaf
1079
1080
1081def build_size_tree(pats, width, outerbits, outermask):
1082    global insnwidth
1083
1084    # Collect the mask of bits that are fixed in this width
1085    innermask = 0xff << (insnwidth - width)
1086    innermask &= ~outermask
1087    minwidth = None
1088    onewidth = True
1089    for i in pats:
1090        innermask &= i.fixedmask
1091        if minwidth is None:
1092            minwidth = i.width
1093        elif minwidth != i.width:
1094            onewidth = False;
1095            if minwidth < i.width:
1096                minwidth = i.width
1097
1098    if onewidth:
1099        return SizeLeaf(innermask, minwidth)
1100
1101    if innermask == 0:
1102        if width < minwidth:
1103            return build_size_tree(pats, width + 8, outerbits, outermask)
1104
1105        pnames = []
1106        for p in pats:
1107            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1108        error_with_file(pats[0].file, pats[0].lineno,
1109                        'overlapping patterns size {0}:'.format(width), pnames)
1110
1111    bins = {}
1112    for i in pats:
1113        fb = i.fixedbits & innermask
1114        if fb in bins:
1115            bins[fb].append(i)
1116        else:
1117            bins[fb] = [i]
1118
1119    fullmask = outermask | innermask
1120    lens = sorted(bins.keys())
1121    if len(lens) == 1:
1122        b = lens[0]
1123        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1124
1125    r = SizeTree(innermask, width)
1126    for b, l in bins.items():
1127        s = build_size_tree(l, width, b | outerbits, fullmask)
1128        r.subs.append((b, s))
1129    return r
1130# end build_size_tree
1131
1132
1133def prop_format(tree):
1134    """Propagate Format objects into the decode tree"""
1135
1136    # Depth first search.
1137    for (b, s) in tree.subs:
1138        if isinstance(s, Tree):
1139            prop_format(s)
1140
1141    # If all entries in SUBS have the same format, then
1142    # propagate that into the tree.
1143    f = None
1144    for (b, s) in tree.subs:
1145        if f is None:
1146            f = s.base
1147            if f is None:
1148                return
1149        if f is not s.base:
1150            return
1151    tree.base = f
1152# end prop_format
1153
1154
1155def prop_size(tree):
1156    """Propagate minimum widths up the decode size tree"""
1157
1158    if isinstance(tree, SizeTree):
1159        min = None
1160        for (b, s) in tree.subs:
1161            width = prop_size(s)
1162            if min is None or min > width:
1163                min = width
1164        assert min >= tree.width
1165        tree.width = min
1166    else:
1167        min = tree.width
1168    return min
1169# end prop_size
1170
1171
1172def main():
1173    global arguments
1174    global formats
1175    global patterns
1176    global allpatterns
1177    global translate_scope
1178    global translate_prefix
1179    global output_fd
1180    global output_file
1181    global input_file
1182    global insnwidth
1183    global insntype
1184    global insnmask
1185    global decode_function
1186    global variablewidth
1187    global anyextern
1188
1189    decode_scope = 'static '
1190
1191    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1192                 'static-decode=', 'varinsnwidth=']
1193    try:
1194        (opts, args) = getopt.getopt(sys.argv[1:], 'o:vw:', long_opts)
1195    except getopt.GetoptError as err:
1196        error(0, err)
1197    for o, a in opts:
1198        if o in ('-o', '--output'):
1199            output_file = a
1200        elif o == '--decode':
1201            decode_function = a
1202            decode_scope = ''
1203        elif o == '--static-decode':
1204            decode_function = a
1205        elif o == '--translate':
1206            translate_prefix = a
1207            translate_scope = ''
1208        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1209            if o == '--varinsnwidth':
1210                variablewidth = True
1211            insnwidth = int(a)
1212            if insnwidth == 16:
1213                insntype = 'uint16_t'
1214                insnmask = 0xffff
1215            elif insnwidth != 32:
1216                error(0, 'cannot handle insns of width', insnwidth)
1217        else:
1218            assert False, 'unhandled option'
1219
1220    if len(args) < 1:
1221        error(0, 'missing input file')
1222    for filename in args:
1223        input_file = filename
1224        f = open(filename, 'r')
1225        parse_file(f)
1226        f.close()
1227
1228    if variablewidth:
1229        stree = build_size_tree(patterns, 8, 0, 0)
1230        prop_size(stree)
1231
1232    dtree = build_tree(patterns, 0, 0)
1233    prop_format(dtree)
1234
1235    if output_file:
1236        output_fd = open(output_file, 'w')
1237    else:
1238        output_fd = sys.stdout
1239
1240    output_autogen()
1241    for n in sorted(arguments.keys()):
1242        f = arguments[n]
1243        f.output_def()
1244
1245    # A single translate function can be invoked for different patterns.
1246    # Make sure that the argument sets are the same, and declare the
1247    # function only once.
1248    #
1249    # If we're sharing formats, we're likely also sharing trans_* functions,
1250    # but we can't tell which ones.  Prevent issues from the compiler by
1251    # suppressing redundant declaration warnings.
1252    if anyextern:
1253        output("#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE\n",
1254               "# pragma GCC diagnostic push\n",
1255               "# pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1256               "# ifdef __clang__\n"
1257               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1258               "# endif\n",
1259               "#endif\n\n")
1260
1261    out_pats = {}
1262    for i in allpatterns:
1263        if i.name in out_pats:
1264            p = out_pats[i.name]
1265            if i.base.base != p.base.base:
1266                error(0, i.name, ' has conflicting argument sets')
1267        else:
1268            i.output_decl()
1269            out_pats[i.name] = i
1270    output('\n')
1271
1272    if anyextern:
1273        output("#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE\n",
1274               "# pragma GCC diagnostic pop\n",
1275               "#endif\n\n")
1276
1277    for n in sorted(formats.keys()):
1278        f = formats[n]
1279        f.output_extract()
1280
1281    output(decode_scope, 'bool ', decode_function,
1282           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1283
1284    i4 = str_indent(4)
1285
1286    if len(allpatterns) != 0:
1287        output(i4, 'union {\n')
1288        for n in sorted(arguments.keys()):
1289            f = arguments[n]
1290            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1291        output(i4, '} u;\n\n')
1292        dtree.output_code(4, False, 0, 0)
1293
1294    output(i4, 'return false;\n')
1295    output('}\n')
1296
1297    if variablewidth:
1298        output('\n', decode_scope, insntype, ' ', decode_function,
1299               '_load(DisasContext *ctx)\n{\n',
1300               '    ', insntype, ' insn = 0;\n\n')
1301        stree.output_code(4, 0, 0, 0)
1302        output('}\n')
1303
1304    if output_file:
1305        output_fd.close()
1306# end main
1307
1308
1309if __name__ == '__main__':
1310    main()
1311