xref: /qemu/scripts/decodetree.py (revision d0fb9657)
1#!/usr/bin/env python3
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2.1 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20# See the syntax and semantics in docs/devel/decodetree.rst.
21#
22
23import io
24import os
25import re
26import sys
27import getopt
28
29insnwidth = 32
30bitop_width = 32
31insnmask = 0xffffffff
32variablewidth = False
33fields = {}
34arguments = {}
35formats = {}
36allpatterns = []
37anyextern = False
38
39translate_prefix = 'trans'
40translate_scope = 'static '
41input_file = ''
42output_file = None
43output_fd = None
44insntype = 'uint32_t'
45decode_function = 'decode'
46
47# An identifier for C.
48re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
49
50# Identifiers for Arguments, Fields, Formats and Patterns.
51re_arg_ident = '&[a-zA-Z0-9_]*'
52re_fld_ident = '%[a-zA-Z0-9_]*'
53re_fmt_ident = '@[a-zA-Z0-9_]*'
54re_pat_ident = '[a-zA-Z0-9_]*'
55
56def error_with_file(file, lineno, *args):
57    """Print an error message from file:line and args and exit."""
58    global output_file
59    global output_fd
60
61    prefix = ''
62    if file:
63        prefix += f'{file}:'
64    if lineno:
65        prefix += f'{lineno}:'
66    if prefix:
67        prefix += ' '
68    print(prefix, end='error: ', file=sys.stderr)
69    print(*args, file=sys.stderr)
70
71    if output_file and output_fd:
72        output_fd.close()
73        os.remove(output_file)
74    exit(1)
75# end error_with_file
76
77
78def error(lineno, *args):
79    error_with_file(input_file, lineno, *args)
80# end error
81
82
83def output(*args):
84    global output_fd
85    for a in args:
86        output_fd.write(a)
87
88
89def output_autogen():
90    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
91
92
93def str_indent(c):
94    """Return a string with C spaces"""
95    return ' ' * c
96
97
98def str_fields(fields):
99    """Return a string uniquely identifying FIELDS"""
100    r = ''
101    for n in sorted(fields.keys()):
102        r += '_' + n
103    return r[1:]
104
105
106def whex(val):
107    """Return a hex string for val padded for insnwidth"""
108    global insnwidth
109    return f'0x{val:0{insnwidth // 4}x}'
110
111
112def whexC(val):
113    """Return a hex string for val padded for insnwidth,
114       and with the proper suffix for a C constant."""
115    suffix = ''
116    if val >= 0x100000000:
117        suffix = 'ull'
118    elif val >= 0x80000000:
119        suffix = 'u'
120    return whex(val) + suffix
121
122
123def str_match_bits(bits, mask):
124    """Return a string pretty-printing BITS/MASK"""
125    global insnwidth
126
127    i = 1 << (insnwidth - 1)
128    space = 0x01010100
129    r = ''
130    while i != 0:
131        if i & mask:
132            if i & bits:
133                r += '1'
134            else:
135                r += '0'
136        else:
137            r += '.'
138        if i & space:
139            r += ' '
140        i >>= 1
141    return r
142
143
144def is_pow2(x):
145    """Return true iff X is equal to a power of 2."""
146    return (x & (x - 1)) == 0
147
148
149def ctz(x):
150    """Return the number of times 2 factors into X."""
151    assert x != 0
152    r = 0
153    while ((x >> r) & 1) == 0:
154        r += 1
155    return r
156
157
158def is_contiguous(bits):
159    if bits == 0:
160        return -1
161    shift = ctz(bits)
162    if is_pow2((bits >> shift) + 1):
163        return shift
164    else:
165        return -1
166
167
168def eq_fields_for_args(flds_a, arg):
169    if len(flds_a) != len(arg.fields):
170        return False
171    # Only allow inference on default types
172    for t in arg.types:
173        if t != 'int':
174            return False
175    for k, a in flds_a.items():
176        if k not in arg.fields:
177            return False
178    return True
179
180
181def eq_fields_for_fmts(flds_a, flds_b):
182    if len(flds_a) != len(flds_b):
183        return False
184    for k, a in flds_a.items():
185        if k not in flds_b:
186            return False
187        b = flds_b[k]
188        if a.__class__ != b.__class__ or a != b:
189            return False
190    return True
191
192
193class Field:
194    """Class representing a simple instruction field"""
195    def __init__(self, sign, pos, len):
196        self.sign = sign
197        self.pos = pos
198        self.len = len
199        self.mask = ((1 << len) - 1) << pos
200
201    def __str__(self):
202        if self.sign:
203            s = 's'
204        else:
205            s = ''
206        return str(self.pos) + ':' + s + str(self.len)
207
208    def str_extract(self):
209        global bitop_width
210        s = 's' if self.sign else ''
211        return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
212
213    def __eq__(self, other):
214        return self.sign == other.sign and self.mask == other.mask
215
216    def __ne__(self, other):
217        return not self.__eq__(other)
218# end Field
219
220
221class MultiField:
222    """Class representing a compound instruction field"""
223    def __init__(self, subs, mask):
224        self.subs = subs
225        self.sign = subs[0].sign
226        self.mask = mask
227
228    def __str__(self):
229        return str(self.subs)
230
231    def str_extract(self):
232        global bitop_width
233        ret = '0'
234        pos = 0
235        for f in reversed(self.subs):
236            ext = f.str_extract()
237            if pos == 0:
238                ret = ext
239            else:
240                ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
241            pos += f.len
242        return ret
243
244    def __ne__(self, other):
245        if len(self.subs) != len(other.subs):
246            return True
247        for a, b in zip(self.subs, other.subs):
248            if a.__class__ != b.__class__ or a != b:
249                return True
250        return False
251
252    def __eq__(self, other):
253        return not self.__ne__(other)
254# end MultiField
255
256
257class ConstField:
258    """Class representing an argument field with constant value"""
259    def __init__(self, value):
260        self.value = value
261        self.mask = 0
262        self.sign = value < 0
263
264    def __str__(self):
265        return str(self.value)
266
267    def str_extract(self):
268        return str(self.value)
269
270    def __cmp__(self, other):
271        return self.value - other.value
272# end ConstField
273
274
275class FunctionField:
276    """Class representing a field passed through a function"""
277    def __init__(self, func, base):
278        self.mask = base.mask
279        self.sign = base.sign
280        self.base = base
281        self.func = func
282
283    def __str__(self):
284        return self.func + '(' + str(self.base) + ')'
285
286    def str_extract(self):
287        return self.func + '(ctx, ' + self.base.str_extract() + ')'
288
289    def __eq__(self, other):
290        return self.func == other.func and self.base == other.base
291
292    def __ne__(self, other):
293        return not self.__eq__(other)
294# end FunctionField
295
296
297class ParameterField:
298    """Class representing a pseudo-field read from a function"""
299    def __init__(self, func):
300        self.mask = 0
301        self.sign = 0
302        self.func = func
303
304    def __str__(self):
305        return self.func
306
307    def str_extract(self):
308        return self.func + '(ctx)'
309
310    def __eq__(self, other):
311        return self.func == other.func
312
313    def __ne__(self, other):
314        return not self.__eq__(other)
315# end ParameterField
316
317
318class Arguments:
319    """Class representing the extracted fields of a format"""
320    def __init__(self, nm, flds, types, extern):
321        self.name = nm
322        self.extern = extern
323        self.fields = flds
324        self.types = types
325
326    def __str__(self):
327        return self.name + ' ' + str(self.fields)
328
329    def struct_name(self):
330        return 'arg_' + self.name
331
332    def output_def(self):
333        if not self.extern:
334            output('typedef struct {\n')
335            for (n, t) in zip(self.fields, self.types):
336                output(f'    {t} {n};\n')
337            output('} ', self.struct_name(), ';\n\n')
338# end Arguments
339
340
341class General:
342    """Common code between instruction formats and instruction patterns"""
343    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
344        self.name = name
345        self.file = input_file
346        self.lineno = lineno
347        self.base = base
348        self.fixedbits = fixb
349        self.fixedmask = fixm
350        self.undefmask = udfm
351        self.fieldmask = fldm
352        self.fields = flds
353        self.width = w
354
355    def __str__(self):
356        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
357
358    def str1(self, i):
359        return str_indent(i) + self.__str__()
360# end General
361
362
363class Format(General):
364    """Class representing an instruction format"""
365
366    def extract_name(self):
367        global decode_function
368        return decode_function + '_extract_' + self.name
369
370    def output_extract(self):
371        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
372               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
373        for n, f in self.fields.items():
374            output('    a->', n, ' = ', f.str_extract(), ';\n')
375        output('}\n\n')
376# end Format
377
378
379class Pattern(General):
380    """Class representing an instruction pattern"""
381
382    def output_decl(self):
383        global translate_scope
384        global translate_prefix
385        output('typedef ', self.base.base.struct_name(),
386               ' arg_', self.name, ';\n')
387        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
388               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
389
390    def output_code(self, i, extracted, outerbits, outermask):
391        global translate_prefix
392        ind = str_indent(i)
393        arg = self.base.base.name
394        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
395        if not extracted:
396            output(ind, self.base.extract_name(),
397                   '(ctx, &u.f_', arg, ', insn);\n')
398        for n, f in self.fields.items():
399            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
400        output(ind, 'if (', translate_prefix, '_', self.name,
401               '(ctx, &u.f_', arg, ')) return true;\n')
402
403    # Normal patterns do not have children.
404    def build_tree(self):
405        return
406    def prop_masks(self):
407        return
408    def prop_format(self):
409        return
410    def prop_width(self):
411        return
412
413# end Pattern
414
415
416class MultiPattern(General):
417    """Class representing a set of instruction patterns"""
418
419    def __init__(self, lineno):
420        self.file = input_file
421        self.lineno = lineno
422        self.pats = []
423        self.base = None
424        self.fixedbits = 0
425        self.fixedmask = 0
426        self.undefmask = 0
427        self.width = None
428
429    def __str__(self):
430        r = 'group'
431        if self.fixedbits is not None:
432            r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
433        return r
434
435    def output_decl(self):
436        for p in self.pats:
437            p.output_decl()
438
439    def prop_masks(self):
440        global insnmask
441
442        fixedmask = insnmask
443        undefmask = insnmask
444
445        # Collect fixedmask/undefmask for all of the children.
446        for p in self.pats:
447            p.prop_masks()
448            fixedmask &= p.fixedmask
449            undefmask &= p.undefmask
450
451        # Widen fixedmask until all fixedbits match
452        repeat = True
453        fixedbits = 0
454        while repeat and fixedmask != 0:
455            fixedbits = None
456            for p in self.pats:
457                thisbits = p.fixedbits & fixedmask
458                if fixedbits is None:
459                    fixedbits = thisbits
460                elif fixedbits != thisbits:
461                    fixedmask &= ~(fixedbits ^ thisbits)
462                    break
463            else:
464                repeat = False
465
466        self.fixedbits = fixedbits
467        self.fixedmask = fixedmask
468        self.undefmask = undefmask
469
470    def build_tree(self):
471        for p in self.pats:
472            p.build_tree()
473
474    def prop_format(self):
475        for p in self.pats:
476            p.build_tree()
477
478    def prop_width(self):
479        width = None
480        for p in self.pats:
481            p.prop_width()
482            if width is None:
483                width = p.width
484            elif width != p.width:
485                error_with_file(self.file, self.lineno,
486                                'width mismatch in patterns within braces')
487        self.width = width
488
489# end MultiPattern
490
491
492class IncMultiPattern(MultiPattern):
493    """Class representing an overlapping set of instruction patterns"""
494
495    def output_code(self, i, extracted, outerbits, outermask):
496        global translate_prefix
497        ind = str_indent(i)
498        for p in self.pats:
499            if outermask != p.fixedmask:
500                innermask = p.fixedmask & ~outermask
501                innerbits = p.fixedbits & ~outermask
502                output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
503                output(ind, f'    /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
504                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
505                output(ind, '}\n')
506            else:
507                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
508#end IncMultiPattern
509
510
511class Tree:
512    """Class representing a node in a decode tree"""
513
514    def __init__(self, fm, tm):
515        self.fixedmask = fm
516        self.thismask = tm
517        self.subs = []
518        self.base = None
519
520    def str1(self, i):
521        ind = str_indent(i)
522        r = ind + whex(self.fixedmask)
523        if self.format:
524            r += ' ' + self.format.name
525        r += ' [\n'
526        for (b, s) in self.subs:
527            r += ind + f'  {whex(b)}:\n'
528            r += s.str1(i + 4) + '\n'
529        r += ind + ']'
530        return r
531
532    def __str__(self):
533        return self.str1(0)
534
535    def output_code(self, i, extracted, outerbits, outermask):
536        ind = str_indent(i)
537
538        # If we identified all nodes below have the same format,
539        # extract the fields now.
540        if not extracted and self.base:
541            output(ind, self.base.extract_name(),
542                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
543            extracted = True
544
545        # Attempt to aid the compiler in producing compact switch statements.
546        # If the bits in the mask are contiguous, extract them.
547        sh = is_contiguous(self.thismask)
548        if sh > 0:
549            # Propagate SH down into the local functions.
550            def str_switch(b, sh=sh):
551                return f'(insn >> {sh}) & {b >> sh:#x}'
552
553            def str_case(b, sh=sh):
554                return hex(b >> sh)
555        else:
556            def str_switch(b):
557                return f'insn & {whexC(b)}'
558
559            def str_case(b):
560                return whexC(b)
561
562        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
563        for b, s in sorted(self.subs):
564            assert (self.thismask & ~s.fixedmask) == 0
565            innermask = outermask | self.thismask
566            innerbits = outerbits | b
567            output(ind, 'case ', str_case(b), ':\n')
568            output(ind, '    /* ',
569                   str_match_bits(innerbits, innermask), ' */\n')
570            s.output_code(i + 4, extracted, innerbits, innermask)
571            output(ind, '    break;\n')
572        output(ind, '}\n')
573# end Tree
574
575
576class ExcMultiPattern(MultiPattern):
577    """Class representing a non-overlapping set of instruction patterns"""
578
579    def output_code(self, i, extracted, outerbits, outermask):
580        # Defer everything to our decomposed Tree node
581        self.tree.output_code(i, extracted, outerbits, outermask)
582
583    @staticmethod
584    def __build_tree(pats, outerbits, outermask):
585        # Find the intersection of all remaining fixedmask.
586        innermask = ~outermask & insnmask
587        for i in pats:
588            innermask &= i.fixedmask
589
590        if innermask == 0:
591            # Edge condition: One pattern covers the entire insnmask
592            if len(pats) == 1:
593                t = Tree(outermask, innermask)
594                t.subs.append((0, pats[0]))
595                return t
596
597            text = 'overlapping patterns:'
598            for p in pats:
599                text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
600            error_with_file(pats[0].file, pats[0].lineno, text)
601
602        fullmask = outermask | innermask
603
604        # Sort each element of pats into the bin selected by the mask.
605        bins = {}
606        for i in pats:
607            fb = i.fixedbits & innermask
608            if fb in bins:
609                bins[fb].append(i)
610            else:
611                bins[fb] = [i]
612
613        # We must recurse if any bin has more than one element or if
614        # the single element in the bin has not been fully matched.
615        t = Tree(fullmask, innermask)
616
617        for b, l in bins.items():
618            s = l[0]
619            if len(l) > 1 or s.fixedmask & ~fullmask != 0:
620                s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
621            t.subs.append((b, s))
622
623        return t
624
625    def build_tree(self):
626        super().prop_format()
627        self.tree = self.__build_tree(self.pats, self.fixedbits,
628                                      self.fixedmask)
629
630    @staticmethod
631    def __prop_format(tree):
632        """Propagate Format objects into the decode tree"""
633
634        # Depth first search.
635        for (b, s) in tree.subs:
636            if isinstance(s, Tree):
637                ExcMultiPattern.__prop_format(s)
638
639        # If all entries in SUBS have the same format, then
640        # propagate that into the tree.
641        f = None
642        for (b, s) in tree.subs:
643            if f is None:
644                f = s.base
645                if f is None:
646                    return
647            if f is not s.base:
648                return
649        tree.base = f
650
651    def prop_format(self):
652        super().prop_format()
653        self.__prop_format(self.tree)
654
655# end ExcMultiPattern
656
657
658def parse_field(lineno, name, toks):
659    """Parse one instruction field from TOKS at LINENO"""
660    global fields
661    global insnwidth
662
663    # A "simple" field will have only one entry;
664    # a "multifield" will have several.
665    subs = []
666    width = 0
667    func = None
668    for t in toks:
669        if re.match('^!function=', t):
670            if func:
671                error(lineno, 'duplicate function')
672            func = t.split('=')
673            func = func[1]
674            continue
675
676        if re.fullmatch('[0-9]+:s[0-9]+', t):
677            # Signed field extract
678            subtoks = t.split(':s')
679            sign = True
680        elif re.fullmatch('[0-9]+:[0-9]+', t):
681            # Unsigned field extract
682            subtoks = t.split(':')
683            sign = False
684        else:
685            error(lineno, f'invalid field token "{t}"')
686        po = int(subtoks[0])
687        le = int(subtoks[1])
688        if po + le > insnwidth:
689            error(lineno, f'field {t} too large')
690        f = Field(sign, po, le)
691        subs.append(f)
692        width += le
693
694    if width > insnwidth:
695        error(lineno, 'field too large')
696    if len(subs) == 0:
697        if func:
698            f = ParameterField(func)
699        else:
700            error(lineno, 'field with no value')
701    else:
702        if len(subs) == 1:
703            f = subs[0]
704        else:
705            mask = 0
706            for s in subs:
707                if mask & s.mask:
708                    error(lineno, 'field components overlap')
709                mask |= s.mask
710            f = MultiField(subs, mask)
711        if func:
712            f = FunctionField(func, f)
713
714    if name in fields:
715        error(lineno, 'duplicate field', name)
716    fields[name] = f
717# end parse_field
718
719
720def parse_arguments(lineno, name, toks):
721    """Parse one argument set from TOKS at LINENO"""
722    global arguments
723    global re_C_ident
724    global anyextern
725
726    flds = []
727    types = []
728    extern = False
729    for n in toks:
730        if re.fullmatch('!extern', n):
731            extern = True
732            anyextern = True
733            continue
734        if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
735            (n, t) = n.split(':')
736        elif re.fullmatch(re_C_ident, n):
737            t = 'int'
738        else:
739            error(lineno, f'invalid argument set token "{n}"')
740        if n in flds:
741            error(lineno, f'duplicate argument "{n}"')
742        flds.append(n)
743        types.append(t)
744
745    if name in arguments:
746        error(lineno, 'duplicate argument set', name)
747    arguments[name] = Arguments(name, flds, types, extern)
748# end parse_arguments
749
750
751def lookup_field(lineno, name):
752    global fields
753    if name in fields:
754        return fields[name]
755    error(lineno, 'undefined field', name)
756
757
758def add_field(lineno, flds, new_name, f):
759    if new_name in flds:
760        error(lineno, 'duplicate field', new_name)
761    flds[new_name] = f
762    return flds
763
764
765def add_field_byname(lineno, flds, new_name, old_name):
766    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
767
768
769def infer_argument_set(flds):
770    global arguments
771    global decode_function
772
773    for arg in arguments.values():
774        if eq_fields_for_args(flds, arg):
775            return arg
776
777    name = decode_function + str(len(arguments))
778    arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
779    arguments[name] = arg
780    return arg
781
782
783def infer_format(arg, fieldmask, flds, width):
784    global arguments
785    global formats
786    global decode_function
787
788    const_flds = {}
789    var_flds = {}
790    for n, c in flds.items():
791        if c is ConstField:
792            const_flds[n] = c
793        else:
794            var_flds[n] = c
795
796    # Look for an existing format with the same argument set and fields
797    for fmt in formats.values():
798        if arg and fmt.base != arg:
799            continue
800        if fieldmask != fmt.fieldmask:
801            continue
802        if width != fmt.width:
803            continue
804        if not eq_fields_for_fmts(flds, fmt.fields):
805            continue
806        return (fmt, const_flds)
807
808    name = decode_function + '_Fmt_' + str(len(formats))
809    if not arg:
810        arg = infer_argument_set(flds)
811
812    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
813    formats[name] = fmt
814
815    return (fmt, const_flds)
816# end infer_format
817
818
819def parse_generic(lineno, parent_pat, name, toks):
820    """Parse one instruction format from TOKS at LINENO"""
821    global fields
822    global arguments
823    global formats
824    global allpatterns
825    global re_arg_ident
826    global re_fld_ident
827    global re_fmt_ident
828    global re_C_ident
829    global insnwidth
830    global insnmask
831    global variablewidth
832
833    is_format = parent_pat is None
834
835    fixedmask = 0
836    fixedbits = 0
837    undefmask = 0
838    width = 0
839    flds = {}
840    arg = None
841    fmt = None
842    for t in toks:
843        # '&Foo' gives a format an explicit argument set.
844        if re.fullmatch(re_arg_ident, t):
845            tt = t[1:]
846            if arg:
847                error(lineno, 'multiple argument sets')
848            if tt in arguments:
849                arg = arguments[tt]
850            else:
851                error(lineno, 'undefined argument set', t)
852            continue
853
854        # '@Foo' gives a pattern an explicit format.
855        if re.fullmatch(re_fmt_ident, t):
856            tt = t[1:]
857            if fmt:
858                error(lineno, 'multiple formats')
859            if tt in formats:
860                fmt = formats[tt]
861            else:
862                error(lineno, 'undefined format', t)
863            continue
864
865        # '%Foo' imports a field.
866        if re.fullmatch(re_fld_ident, t):
867            tt = t[1:]
868            flds = add_field_byname(lineno, flds, tt, tt)
869            continue
870
871        # 'Foo=%Bar' imports a field with a different name.
872        if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
873            (fname, iname) = t.split('=%')
874            flds = add_field_byname(lineno, flds, fname, iname)
875            continue
876
877        # 'Foo=number' sets an argument field to a constant value
878        if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
879            (fname, value) = t.split('=')
880            value = int(value)
881            flds = add_field(lineno, flds, fname, ConstField(value))
882            continue
883
884        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
885        # required ones, or dont-cares.
886        if re.fullmatch('[01.-]+', t):
887            shift = len(t)
888            fms = t.replace('0', '1')
889            fms = fms.replace('.', '0')
890            fms = fms.replace('-', '0')
891            fbs = t.replace('.', '0')
892            fbs = fbs.replace('-', '0')
893            ubm = t.replace('1', '0')
894            ubm = ubm.replace('.', '0')
895            ubm = ubm.replace('-', '1')
896            fms = int(fms, 2)
897            fbs = int(fbs, 2)
898            ubm = int(ubm, 2)
899            fixedbits = (fixedbits << shift) | fbs
900            fixedmask = (fixedmask << shift) | fms
901            undefmask = (undefmask << shift) | ubm
902        # Otherwise, fieldname:fieldwidth
903        elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
904            (fname, flen) = t.split(':')
905            sign = False
906            if flen[0] == 's':
907                sign = True
908                flen = flen[1:]
909            shift = int(flen, 10)
910            if shift + width > insnwidth:
911                error(lineno, f'field {fname} exceeds insnwidth')
912            f = Field(sign, insnwidth - width - shift, shift)
913            flds = add_field(lineno, flds, fname, f)
914            fixedbits <<= shift
915            fixedmask <<= shift
916            undefmask <<= shift
917        else:
918            error(lineno, f'invalid token "{t}"')
919        width += shift
920
921    if variablewidth and width < insnwidth and width % 8 == 0:
922        shift = insnwidth - width
923        fixedbits <<= shift
924        fixedmask <<= shift
925        undefmask <<= shift
926        undefmask |= (1 << shift) - 1
927
928    # We should have filled in all of the bits of the instruction.
929    elif not (is_format and width == 0) and width != insnwidth:
930        error(lineno, f'definition has {width} bits')
931
932    # Do not check for fields overlapping fields; one valid usage
933    # is to be able to duplicate fields via import.
934    fieldmask = 0
935    for f in flds.values():
936        fieldmask |= f.mask
937
938    # Fix up what we've parsed to match either a format or a pattern.
939    if is_format:
940        # Formats cannot reference formats.
941        if fmt:
942            error(lineno, 'format referencing format')
943        # If an argument set is given, then there should be no fields
944        # without a place to store it.
945        if arg:
946            for f in flds.keys():
947                if f not in arg.fields:
948                    error(lineno, f'field {f} not in argument set {arg.name}')
949        else:
950            arg = infer_argument_set(flds)
951        if name in formats:
952            error(lineno, 'duplicate format name', name)
953        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
954                     undefmask, fieldmask, flds, width)
955        formats[name] = fmt
956    else:
957        # Patterns can reference a format ...
958        if fmt:
959            # ... but not an argument simultaneously
960            if arg:
961                error(lineno, 'pattern specifies both format and argument set')
962            if fixedmask & fmt.fixedmask:
963                error(lineno, 'pattern fixed bits overlap format fixed bits')
964            if width != fmt.width:
965                error(lineno, 'pattern uses format of different width')
966            fieldmask |= fmt.fieldmask
967            fixedbits |= fmt.fixedbits
968            fixedmask |= fmt.fixedmask
969            undefmask |= fmt.undefmask
970        else:
971            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
972        arg = fmt.base
973        for f in flds.keys():
974            if f not in arg.fields:
975                error(lineno, f'field {f} not in argument set {arg.name}')
976            if f in fmt.fields.keys():
977                error(lineno, f'field {f} set by format and pattern')
978        for f in arg.fields:
979            if f not in flds.keys() and f not in fmt.fields.keys():
980                error(lineno, f'field {f} not initialized')
981        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
982                      undefmask, fieldmask, flds, width)
983        parent_pat.pats.append(pat)
984        allpatterns.append(pat)
985
986    # Validate the masks that we have assembled.
987    if fieldmask & fixedmask:
988        error(lineno, 'fieldmask overlaps fixedmask ',
989              f'({whex(fieldmask)} & {whex(fixedmask)})')
990    if fieldmask & undefmask:
991        error(lineno, 'fieldmask overlaps undefmask ',
992              f'({whex(fieldmask)} & {whex(undefmask)})')
993    if fixedmask & undefmask:
994        error(lineno, 'fixedmask overlaps undefmask ',
995              f'({whex(fixedmask)} & {whex(undefmask)})')
996    if not is_format:
997        allbits = fieldmask | fixedmask | undefmask
998        if allbits != insnmask:
999            error(lineno, 'bits left unspecified ',
1000                  f'({whex(allbits ^ insnmask)})')
1001# end parse_general
1002
1003
1004def parse_file(f, parent_pat):
1005    """Parse all of the patterns within a file"""
1006    global re_arg_ident
1007    global re_fld_ident
1008    global re_fmt_ident
1009    global re_pat_ident
1010
1011    # Read all of the lines of the file.  Concatenate lines
1012    # ending in backslash; discard empty lines and comments.
1013    toks = []
1014    lineno = 0
1015    nesting = 0
1016    nesting_pats = []
1017
1018    for line in f:
1019        lineno += 1
1020
1021        # Expand and strip spaces, to find indent.
1022        line = line.rstrip()
1023        line = line.expandtabs()
1024        len1 = len(line)
1025        line = line.lstrip()
1026        len2 = len(line)
1027
1028        # Discard comments
1029        end = line.find('#')
1030        if end >= 0:
1031            line = line[:end]
1032
1033        t = line.split()
1034        if len(toks) != 0:
1035            # Next line after continuation
1036            toks.extend(t)
1037        else:
1038            # Allow completely blank lines.
1039            if len1 == 0:
1040                continue
1041            indent = len1 - len2
1042            # Empty line due to comment.
1043            if len(t) == 0:
1044                # Indentation must be correct, even for comment lines.
1045                if indent != nesting:
1046                    error(lineno, 'indentation ', indent, ' != ', nesting)
1047                continue
1048            start_lineno = lineno
1049            toks = t
1050
1051        # Continuation?
1052        if toks[-1] == '\\':
1053            toks.pop()
1054            continue
1055
1056        name = toks[0]
1057        del toks[0]
1058
1059        # End nesting?
1060        if name == '}' or name == ']':
1061            if len(toks) != 0:
1062                error(start_lineno, 'extra tokens after close brace')
1063
1064            # Make sure { } and [ ] nest properly.
1065            if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1066                error(lineno, 'mismatched close brace')
1067
1068            try:
1069                parent_pat = nesting_pats.pop()
1070            except:
1071                error(lineno, 'extra close brace')
1072
1073            nesting -= 2
1074            if indent != nesting:
1075                error(lineno, 'indentation ', indent, ' != ', nesting)
1076
1077            toks = []
1078            continue
1079
1080        # Everything else should have current indentation.
1081        if indent != nesting:
1082            error(start_lineno, 'indentation ', indent, ' != ', nesting)
1083
1084        # Start nesting?
1085        if name == '{' or name == '[':
1086            if len(toks) != 0:
1087                error(start_lineno, 'extra tokens after open brace')
1088
1089            if name == '{':
1090                nested_pat = IncMultiPattern(start_lineno)
1091            else:
1092                nested_pat = ExcMultiPattern(start_lineno)
1093            parent_pat.pats.append(nested_pat)
1094            nesting_pats.append(parent_pat)
1095            parent_pat = nested_pat
1096
1097            nesting += 2
1098            toks = []
1099            continue
1100
1101        # Determine the type of object needing to be parsed.
1102        if re.fullmatch(re_fld_ident, name):
1103            parse_field(start_lineno, name[1:], toks)
1104        elif re.fullmatch(re_arg_ident, name):
1105            parse_arguments(start_lineno, name[1:], toks)
1106        elif re.fullmatch(re_fmt_ident, name):
1107            parse_generic(start_lineno, None, name[1:], toks)
1108        elif re.fullmatch(re_pat_ident, name):
1109            parse_generic(start_lineno, parent_pat, name, toks)
1110        else:
1111            error(lineno, f'invalid token "{name}"')
1112        toks = []
1113
1114    if nesting != 0:
1115        error(lineno, 'missing close brace')
1116# end parse_file
1117
1118
1119class SizeTree:
1120    """Class representing a node in a size decode tree"""
1121
1122    def __init__(self, m, w):
1123        self.mask = m
1124        self.subs = []
1125        self.base = None
1126        self.width = w
1127
1128    def str1(self, i):
1129        ind = str_indent(i)
1130        r = ind + whex(self.mask) + ' [\n'
1131        for (b, s) in self.subs:
1132            r += ind + f'  {whex(b)}:\n'
1133            r += s.str1(i + 4) + '\n'
1134        r += ind + ']'
1135        return r
1136
1137    def __str__(self):
1138        return self.str1(0)
1139
1140    def output_code(self, i, extracted, outerbits, outermask):
1141        ind = str_indent(i)
1142
1143        # If we need to load more bytes to test, do so now.
1144        if extracted < self.width:
1145            output(ind, f'insn = {decode_function}_load_bytes',
1146                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1147            extracted = self.width
1148
1149        # Attempt to aid the compiler in producing compact switch statements.
1150        # If the bits in the mask are contiguous, extract them.
1151        sh = is_contiguous(self.mask)
1152        if sh > 0:
1153            # Propagate SH down into the local functions.
1154            def str_switch(b, sh=sh):
1155                return f'(insn >> {sh}) & {b >> sh:#x}'
1156
1157            def str_case(b, sh=sh):
1158                return hex(b >> sh)
1159        else:
1160            def str_switch(b):
1161                return f'insn & {whexC(b)}'
1162
1163            def str_case(b):
1164                return whexC(b)
1165
1166        output(ind, 'switch (', str_switch(self.mask), ') {\n')
1167        for b, s in sorted(self.subs):
1168            innermask = outermask | self.mask
1169            innerbits = outerbits | b
1170            output(ind, 'case ', str_case(b), ':\n')
1171            output(ind, '    /* ',
1172                   str_match_bits(innerbits, innermask), ' */\n')
1173            s.output_code(i + 4, extracted, innerbits, innermask)
1174        output(ind, '}\n')
1175        output(ind, 'return insn;\n')
1176# end SizeTree
1177
1178class SizeLeaf:
1179    """Class representing a leaf node in a size decode tree"""
1180
1181    def __init__(self, m, w):
1182        self.mask = m
1183        self.width = w
1184
1185    def str1(self, i):
1186        return str_indent(i) + whex(self.mask)
1187
1188    def __str__(self):
1189        return self.str1(0)
1190
1191    def output_code(self, i, extracted, outerbits, outermask):
1192        global decode_function
1193        ind = str_indent(i)
1194
1195        # If we need to load more bytes, do so now.
1196        if extracted < self.width:
1197            output(ind, f'insn = {decode_function}_load_bytes',
1198                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1199            extracted = self.width
1200        output(ind, 'return insn;\n')
1201# end SizeLeaf
1202
1203
1204def build_size_tree(pats, width, outerbits, outermask):
1205    global insnwidth
1206
1207    # Collect the mask of bits that are fixed in this width
1208    innermask = 0xff << (insnwidth - width)
1209    innermask &= ~outermask
1210    minwidth = None
1211    onewidth = True
1212    for i in pats:
1213        innermask &= i.fixedmask
1214        if minwidth is None:
1215            minwidth = i.width
1216        elif minwidth != i.width:
1217            onewidth = False;
1218            if minwidth < i.width:
1219                minwidth = i.width
1220
1221    if onewidth:
1222        return SizeLeaf(innermask, minwidth)
1223
1224    if innermask == 0:
1225        if width < minwidth:
1226            return build_size_tree(pats, width + 8, outerbits, outermask)
1227
1228        pnames = []
1229        for p in pats:
1230            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1231        error_with_file(pats[0].file, pats[0].lineno,
1232                        f'overlapping patterns size {width}:', pnames)
1233
1234    bins = {}
1235    for i in pats:
1236        fb = i.fixedbits & innermask
1237        if fb in bins:
1238            bins[fb].append(i)
1239        else:
1240            bins[fb] = [i]
1241
1242    fullmask = outermask | innermask
1243    lens = sorted(bins.keys())
1244    if len(lens) == 1:
1245        b = lens[0]
1246        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1247
1248    r = SizeTree(innermask, width)
1249    for b, l in bins.items():
1250        s = build_size_tree(l, width, b | outerbits, fullmask)
1251        r.subs.append((b, s))
1252    return r
1253# end build_size_tree
1254
1255
1256def prop_size(tree):
1257    """Propagate minimum widths up the decode size tree"""
1258
1259    if isinstance(tree, SizeTree):
1260        min = None
1261        for (b, s) in tree.subs:
1262            width = prop_size(s)
1263            if min is None or min > width:
1264                min = width
1265        assert min >= tree.width
1266        tree.width = min
1267    else:
1268        min = tree.width
1269    return min
1270# end prop_size
1271
1272
1273def main():
1274    global arguments
1275    global formats
1276    global allpatterns
1277    global translate_scope
1278    global translate_prefix
1279    global output_fd
1280    global output_file
1281    global input_file
1282    global insnwidth
1283    global insntype
1284    global insnmask
1285    global decode_function
1286    global bitop_width
1287    global variablewidth
1288    global anyextern
1289
1290    decode_scope = 'static '
1291
1292    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1293                 'static-decode=', 'varinsnwidth=']
1294    try:
1295        (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1296    except getopt.GetoptError as err:
1297        error(0, err)
1298    for o, a in opts:
1299        if o in ('-o', '--output'):
1300            output_file = a
1301        elif o == '--decode':
1302            decode_function = a
1303            decode_scope = ''
1304        elif o == '--static-decode':
1305            decode_function = a
1306        elif o == '--translate':
1307            translate_prefix = a
1308            translate_scope = ''
1309        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1310            if o == '--varinsnwidth':
1311                variablewidth = True
1312            insnwidth = int(a)
1313            if insnwidth == 16:
1314                insntype = 'uint16_t'
1315                insnmask = 0xffff
1316            elif insnwidth == 64:
1317                insntype = 'uint64_t'
1318                insnmask = 0xffffffffffffffff
1319                bitop_width = 64
1320            elif insnwidth != 32:
1321                error(0, 'cannot handle insns of width', insnwidth)
1322        else:
1323            assert False, 'unhandled option'
1324
1325    if len(args) < 1:
1326        error(0, 'missing input file')
1327
1328    toppat = ExcMultiPattern(0)
1329
1330    for filename in args:
1331        input_file = filename
1332        f = open(filename, 'rt', encoding='utf-8')
1333        parse_file(f, toppat)
1334        f.close()
1335
1336    # We do not want to compute masks for toppat, because those masks
1337    # are used as a starting point for build_tree.  For toppat, we must
1338    # insist that decode begins from naught.
1339    for i in toppat.pats:
1340        i.prop_masks()
1341
1342    toppat.build_tree()
1343    toppat.prop_format()
1344
1345    if variablewidth:
1346        for i in toppat.pats:
1347            i.prop_width()
1348        stree = build_size_tree(toppat.pats, 8, 0, 0)
1349        prop_size(stree)
1350
1351    if output_file:
1352        output_fd = open(output_file, 'wt', encoding='utf-8')
1353    else:
1354        output_fd = io.TextIOWrapper(sys.stdout.buffer,
1355                                     encoding=sys.stdout.encoding,
1356                                     errors="ignore")
1357
1358    output_autogen()
1359    for n in sorted(arguments.keys()):
1360        f = arguments[n]
1361        f.output_def()
1362
1363    # A single translate function can be invoked for different patterns.
1364    # Make sure that the argument sets are the same, and declare the
1365    # function only once.
1366    #
1367    # If we're sharing formats, we're likely also sharing trans_* functions,
1368    # but we can't tell which ones.  Prevent issues from the compiler by
1369    # suppressing redundant declaration warnings.
1370    if anyextern:
1371        output("#pragma GCC diagnostic push\n",
1372               "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1373               "#ifdef __clang__\n"
1374               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1375               "#endif\n\n")
1376
1377    out_pats = {}
1378    for i in allpatterns:
1379        if i.name in out_pats:
1380            p = out_pats[i.name]
1381            if i.base.base != p.base.base:
1382                error(0, i.name, ' has conflicting argument sets')
1383        else:
1384            i.output_decl()
1385            out_pats[i.name] = i
1386    output('\n')
1387
1388    if anyextern:
1389        output("#pragma GCC diagnostic pop\n\n")
1390
1391    for n in sorted(formats.keys()):
1392        f = formats[n]
1393        f.output_extract()
1394
1395    output(decode_scope, 'bool ', decode_function,
1396           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1397
1398    i4 = str_indent(4)
1399
1400    if len(allpatterns) != 0:
1401        output(i4, 'union {\n')
1402        for n in sorted(arguments.keys()):
1403            f = arguments[n]
1404            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1405        output(i4, '} u;\n\n')
1406        toppat.output_code(4, False, 0, 0)
1407
1408    output(i4, 'return false;\n')
1409    output('}\n')
1410
1411    if variablewidth:
1412        output('\n', decode_scope, insntype, ' ', decode_function,
1413               '_load(DisasContext *ctx)\n{\n',
1414               '    ', insntype, ' insn = 0;\n\n')
1415        stree.output_code(4, 0, 0, 0)
1416        output('}\n')
1417
1418    if output_file:
1419        output_fd.close()
1420# end main
1421
1422
1423if __name__ == '__main__':
1424    main()
1425