xref: /qemu/scripts/decodetree.py (revision 138ca49a)
1#!/usr/bin/env python3
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2.1 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20# See the syntax and semantics in docs/devel/decodetree.rst.
21#
22
23import io
24import os
25import re
26import sys
27import getopt
28
29insnwidth = 32
30insnmask = 0xffffffff
31variablewidth = False
32fields = {}
33arguments = {}
34formats = {}
35allpatterns = []
36anyextern = False
37
38translate_prefix = 'trans'
39translate_scope = 'static '
40input_file = ''
41output_file = None
42output_fd = None
43insntype = 'uint32_t'
44decode_function = 'decode'
45
46# An identifier for C.
47re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
48
49# Identifiers for Arguments, Fields, Formats and Patterns.
50re_arg_ident = '&[a-zA-Z0-9_]*'
51re_fld_ident = '%[a-zA-Z0-9_]*'
52re_fmt_ident = '@[a-zA-Z0-9_]*'
53re_pat_ident = '[a-zA-Z0-9_]*'
54
55def error_with_file(file, lineno, *args):
56    """Print an error message from file:line and args and exit."""
57    global output_file
58    global output_fd
59
60    prefix = ''
61    if file:
62        prefix += '{0}:'.format(file)
63    if lineno:
64        prefix += '{0}:'.format(lineno)
65    if prefix:
66        prefix += ' '
67    print(prefix, end='error: ', file=sys.stderr)
68    print(*args, file=sys.stderr)
69
70    if output_file and output_fd:
71        output_fd.close()
72        os.remove(output_file)
73    exit(1)
74# end error_with_file
75
76
77def error(lineno, *args):
78    error_with_file(input_file, lineno, *args)
79# end error
80
81
82def output(*args):
83    global output_fd
84    for a in args:
85        output_fd.write(a)
86
87
88def output_autogen():
89    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
90
91
92def str_indent(c):
93    """Return a string with C spaces"""
94    return ' ' * c
95
96
97def str_fields(fields):
98    """Return a string uniquely identifying FIELDS"""
99    r = ''
100    for n in sorted(fields.keys()):
101        r += '_' + n
102    return r[1:]
103
104
105def str_match_bits(bits, mask):
106    """Return a string pretty-printing BITS/MASK"""
107    global insnwidth
108
109    i = 1 << (insnwidth - 1)
110    space = 0x01010100
111    r = ''
112    while i != 0:
113        if i & mask:
114            if i & bits:
115                r += '1'
116            else:
117                r += '0'
118        else:
119            r += '.'
120        if i & space:
121            r += ' '
122        i >>= 1
123    return r
124
125
126def is_pow2(x):
127    """Return true iff X is equal to a power of 2."""
128    return (x & (x - 1)) == 0
129
130
131def ctz(x):
132    """Return the number of times 2 factors into X."""
133    assert x != 0
134    r = 0
135    while ((x >> r) & 1) == 0:
136        r += 1
137    return r
138
139
140def is_contiguous(bits):
141    if bits == 0:
142        return -1
143    shift = ctz(bits)
144    if is_pow2((bits >> shift) + 1):
145        return shift
146    else:
147        return -1
148
149
150def eq_fields_for_args(flds_a, flds_b):
151    if len(flds_a) != len(flds_b):
152        return False
153    for k, a in flds_a.items():
154        if k not in flds_b:
155            return False
156    return True
157
158
159def eq_fields_for_fmts(flds_a, flds_b):
160    if len(flds_a) != len(flds_b):
161        return False
162    for k, a in flds_a.items():
163        if k not in flds_b:
164            return False
165        b = flds_b[k]
166        if a.__class__ != b.__class__ or a != b:
167            return False
168    return True
169
170
171class Field:
172    """Class representing a simple instruction field"""
173    def __init__(self, sign, pos, len):
174        self.sign = sign
175        self.pos = pos
176        self.len = len
177        self.mask = ((1 << len) - 1) << pos
178
179    def __str__(self):
180        if self.sign:
181            s = 's'
182        else:
183            s = ''
184        return str(self.pos) + ':' + s + str(self.len)
185
186    def str_extract(self):
187        if self.sign:
188            extr = 'sextract32'
189        else:
190            extr = 'extract32'
191        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
192
193    def __eq__(self, other):
194        return self.sign == other.sign and self.mask == other.mask
195
196    def __ne__(self, other):
197        return not self.__eq__(other)
198# end Field
199
200
201class MultiField:
202    """Class representing a compound instruction field"""
203    def __init__(self, subs, mask):
204        self.subs = subs
205        self.sign = subs[0].sign
206        self.mask = mask
207
208    def __str__(self):
209        return str(self.subs)
210
211    def str_extract(self):
212        ret = '0'
213        pos = 0
214        for f in reversed(self.subs):
215            if pos == 0:
216                ret = f.str_extract()
217            else:
218                ret = 'deposit32({0}, {1}, {2}, {3})' \
219                      .format(ret, pos, 32 - pos, f.str_extract())
220            pos += f.len
221        return ret
222
223    def __ne__(self, other):
224        if len(self.subs) != len(other.subs):
225            return True
226        for a, b in zip(self.subs, other.subs):
227            if a.__class__ != b.__class__ or a != b:
228                return True
229        return False
230
231    def __eq__(self, other):
232        return not self.__ne__(other)
233# end MultiField
234
235
236class ConstField:
237    """Class representing an argument field with constant value"""
238    def __init__(self, value):
239        self.value = value
240        self.mask = 0
241        self.sign = value < 0
242
243    def __str__(self):
244        return str(self.value)
245
246    def str_extract(self):
247        return str(self.value)
248
249    def __cmp__(self, other):
250        return self.value - other.value
251# end ConstField
252
253
254class FunctionField:
255    """Class representing a field passed through a function"""
256    def __init__(self, func, base):
257        self.mask = base.mask
258        self.sign = base.sign
259        self.base = base
260        self.func = func
261
262    def __str__(self):
263        return self.func + '(' + str(self.base) + ')'
264
265    def str_extract(self):
266        return self.func + '(ctx, ' + self.base.str_extract() + ')'
267
268    def __eq__(self, other):
269        return self.func == other.func and self.base == other.base
270
271    def __ne__(self, other):
272        return not self.__eq__(other)
273# end FunctionField
274
275
276class ParameterField:
277    """Class representing a pseudo-field read from a function"""
278    def __init__(self, func):
279        self.mask = 0
280        self.sign = 0
281        self.func = func
282
283    def __str__(self):
284        return self.func
285
286    def str_extract(self):
287        return self.func + '(ctx)'
288
289    def __eq__(self, other):
290        return self.func == other.func
291
292    def __ne__(self, other):
293        return not self.__eq__(other)
294# end ParameterField
295
296
297class Arguments:
298    """Class representing the extracted fields of a format"""
299    def __init__(self, nm, flds, extern):
300        self.name = nm
301        self.extern = extern
302        self.fields = sorted(flds)
303
304    def __str__(self):
305        return self.name + ' ' + str(self.fields)
306
307    def struct_name(self):
308        return 'arg_' + self.name
309
310    def output_def(self):
311        if not self.extern:
312            output('typedef struct {\n')
313            for n in self.fields:
314                output('    int ', n, ';\n')
315            output('} ', self.struct_name(), ';\n\n')
316# end Arguments
317
318
319class General:
320    """Common code between instruction formats and instruction patterns"""
321    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
322        self.name = name
323        self.file = input_file
324        self.lineno = lineno
325        self.base = base
326        self.fixedbits = fixb
327        self.fixedmask = fixm
328        self.undefmask = udfm
329        self.fieldmask = fldm
330        self.fields = flds
331        self.width = w
332
333    def __str__(self):
334        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
335
336    def str1(self, i):
337        return str_indent(i) + self.__str__()
338# end General
339
340
341class Format(General):
342    """Class representing an instruction format"""
343
344    def extract_name(self):
345        global decode_function
346        return decode_function + '_extract_' + self.name
347
348    def output_extract(self):
349        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
350               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
351        for n, f in self.fields.items():
352            output('    a->', n, ' = ', f.str_extract(), ';\n')
353        output('}\n\n')
354# end Format
355
356
357class Pattern(General):
358    """Class representing an instruction pattern"""
359
360    def output_decl(self):
361        global translate_scope
362        global translate_prefix
363        output('typedef ', self.base.base.struct_name(),
364               ' arg_', self.name, ';\n')
365        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
366               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
367
368    def output_code(self, i, extracted, outerbits, outermask):
369        global translate_prefix
370        ind = str_indent(i)
371        arg = self.base.base.name
372        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
373        if not extracted:
374            output(ind, self.base.extract_name(),
375                   '(ctx, &u.f_', arg, ', insn);\n')
376        for n, f in self.fields.items():
377            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
378        output(ind, 'if (', translate_prefix, '_', self.name,
379               '(ctx, &u.f_', arg, ')) return true;\n')
380
381    # Normal patterns do not have children.
382    def build_tree(self):
383        return
384    def prop_masks(self):
385        return
386    def prop_format(self):
387        return
388    def prop_width(self):
389        return
390
391# end Pattern
392
393
394class MultiPattern(General):
395    """Class representing a set of instruction patterns"""
396
397    def __init__(self, lineno):
398        self.file = input_file
399        self.lineno = lineno
400        self.pats = []
401        self.base = None
402        self.fixedbits = 0
403        self.fixedmask = 0
404        self.undefmask = 0
405        self.width = None
406
407    def __str__(self):
408        r = 'group'
409        if self.fixedbits is not None:
410            r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
411        return r
412
413    def output_decl(self):
414        for p in self.pats:
415            p.output_decl()
416
417    def prop_masks(self):
418        global insnmask
419
420        fixedmask = insnmask
421        undefmask = insnmask
422
423        # Collect fixedmask/undefmask for all of the children.
424        for p in self.pats:
425            p.prop_masks()
426            fixedmask &= p.fixedmask
427            undefmask &= p.undefmask
428
429        # Widen fixedmask until all fixedbits match
430        repeat = True
431        fixedbits = 0
432        while repeat and fixedmask != 0:
433            fixedbits = None
434            for p in self.pats:
435                thisbits = p.fixedbits & fixedmask
436                if fixedbits is None:
437                    fixedbits = thisbits
438                elif fixedbits != thisbits:
439                    fixedmask &= ~(fixedbits ^ thisbits)
440                    break
441            else:
442                repeat = False
443
444        self.fixedbits = fixedbits
445        self.fixedmask = fixedmask
446        self.undefmask = undefmask
447
448    def build_tree(self):
449        for p in self.pats:
450            p.build_tree()
451
452    def prop_format(self):
453        for p in self.pats:
454            p.build_tree()
455
456    def prop_width(self):
457        width = None
458        for p in self.pats:
459            p.prop_width()
460            if width is None:
461                width = p.width
462            elif width != p.width:
463                error_with_file(self.file, self.lineno,
464                                'width mismatch in patterns within braces')
465        self.width = width
466
467# end MultiPattern
468
469
470class IncMultiPattern(MultiPattern):
471    """Class representing an overlapping set of instruction patterns"""
472
473    def output_code(self, i, extracted, outerbits, outermask):
474        global translate_prefix
475        ind = str_indent(i)
476        for p in self.pats:
477            if outermask != p.fixedmask:
478                innermask = p.fixedmask & ~outermask
479                innerbits = p.fixedbits & ~outermask
480                output(ind, 'if ((insn & ',
481                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
482                       ') {\n')
483                output(ind, '    /* ',
484                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
485                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
486                output(ind, '}\n')
487            else:
488                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
489#end IncMultiPattern
490
491
492class Tree:
493    """Class representing a node in a decode tree"""
494
495    def __init__(self, fm, tm):
496        self.fixedmask = fm
497        self.thismask = tm
498        self.subs = []
499        self.base = None
500
501    def str1(self, i):
502        ind = str_indent(i)
503        r = '{0}{1:08x}'.format(ind, self.fixedmask)
504        if self.format:
505            r += ' ' + self.format.name
506        r += ' [\n'
507        for (b, s) in self.subs:
508            r += '{0}  {1:08x}:\n'.format(ind, b)
509            r += s.str1(i + 4) + '\n'
510        r += ind + ']'
511        return r
512
513    def __str__(self):
514        return self.str1(0)
515
516    def output_code(self, i, extracted, outerbits, outermask):
517        ind = str_indent(i)
518
519        # If we identified all nodes below have the same format,
520        # extract the fields now.
521        if not extracted and self.base:
522            output(ind, self.base.extract_name(),
523                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
524            extracted = True
525
526        # Attempt to aid the compiler in producing compact switch statements.
527        # If the bits in the mask are contiguous, extract them.
528        sh = is_contiguous(self.thismask)
529        if sh > 0:
530            # Propagate SH down into the local functions.
531            def str_switch(b, sh=sh):
532                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
533
534            def str_case(b, sh=sh):
535                return '0x{0:x}'.format(b >> sh)
536        else:
537            def str_switch(b):
538                return 'insn & 0x{0:08x}'.format(b)
539
540            def str_case(b):
541                return '0x{0:08x}'.format(b)
542
543        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
544        for b, s in sorted(self.subs):
545            assert (self.thismask & ~s.fixedmask) == 0
546            innermask = outermask | self.thismask
547            innerbits = outerbits | b
548            output(ind, 'case ', str_case(b), ':\n')
549            output(ind, '    /* ',
550                   str_match_bits(innerbits, innermask), ' */\n')
551            s.output_code(i + 4, extracted, innerbits, innermask)
552            output(ind, '    break;\n')
553        output(ind, '}\n')
554# end Tree
555
556
557class ExcMultiPattern(MultiPattern):
558    """Class representing a non-overlapping set of instruction patterns"""
559
560    def output_code(self, i, extracted, outerbits, outermask):
561        # Defer everything to our decomposed Tree node
562        self.tree.output_code(i, extracted, outerbits, outermask)
563
564    @staticmethod
565    def __build_tree(pats, outerbits, outermask):
566        # Find the intersection of all remaining fixedmask.
567        innermask = ~outermask & insnmask
568        for i in pats:
569            innermask &= i.fixedmask
570
571        if innermask == 0:
572            # Edge condition: One pattern covers the entire insnmask
573            if len(pats) == 1:
574                t = Tree(outermask, innermask)
575                t.subs.append((0, pats[0]))
576                return t
577
578            text = 'overlapping patterns:'
579            for p in pats:
580                text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
581            error_with_file(pats[0].file, pats[0].lineno, text)
582
583        fullmask = outermask | innermask
584
585        # Sort each element of pats into the bin selected by the mask.
586        bins = {}
587        for i in pats:
588            fb = i.fixedbits & innermask
589            if fb in bins:
590                bins[fb].append(i)
591            else:
592                bins[fb] = [i]
593
594        # We must recurse if any bin has more than one element or if
595        # the single element in the bin has not been fully matched.
596        t = Tree(fullmask, innermask)
597
598        for b, l in bins.items():
599            s = l[0]
600            if len(l) > 1 or s.fixedmask & ~fullmask != 0:
601                s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
602            t.subs.append((b, s))
603
604        return t
605
606    def build_tree(self):
607        super().prop_format()
608        self.tree = self.__build_tree(self.pats, self.fixedbits,
609                                      self.fixedmask)
610
611    @staticmethod
612    def __prop_format(tree):
613        """Propagate Format objects into the decode tree"""
614
615        # Depth first search.
616        for (b, s) in tree.subs:
617            if isinstance(s, Tree):
618                ExcMultiPattern.__prop_format(s)
619
620        # If all entries in SUBS have the same format, then
621        # propagate that into the tree.
622        f = None
623        for (b, s) in tree.subs:
624            if f is None:
625                f = s.base
626                if f is None:
627                    return
628            if f is not s.base:
629                return
630        tree.base = f
631
632    def prop_format(self):
633        super().prop_format()
634        self.__prop_format(self.tree)
635
636# end ExcMultiPattern
637
638
639def parse_field(lineno, name, toks):
640    """Parse one instruction field from TOKS at LINENO"""
641    global fields
642    global insnwidth
643
644    # A "simple" field will have only one entry;
645    # a "multifield" will have several.
646    subs = []
647    width = 0
648    func = None
649    for t in toks:
650        if re.match('^!function=', t):
651            if func:
652                error(lineno, 'duplicate function')
653            func = t.split('=')
654            func = func[1]
655            continue
656
657        if re.fullmatch('[0-9]+:s[0-9]+', t):
658            # Signed field extract
659            subtoks = t.split(':s')
660            sign = True
661        elif re.fullmatch('[0-9]+:[0-9]+', t):
662            # Unsigned field extract
663            subtoks = t.split(':')
664            sign = False
665        else:
666            error(lineno, 'invalid field token "{0}"'.format(t))
667        po = int(subtoks[0])
668        le = int(subtoks[1])
669        if po + le > insnwidth:
670            error(lineno, 'field {0} too large'.format(t))
671        f = Field(sign, po, le)
672        subs.append(f)
673        width += le
674
675    if width > insnwidth:
676        error(lineno, 'field too large')
677    if len(subs) == 0:
678        if func:
679            f = ParameterField(func)
680        else:
681            error(lineno, 'field with no value')
682    else:
683        if len(subs) == 1:
684            f = subs[0]
685        else:
686            mask = 0
687            for s in subs:
688                if mask & s.mask:
689                    error(lineno, 'field components overlap')
690                mask |= s.mask
691            f = MultiField(subs, mask)
692        if func:
693            f = FunctionField(func, f)
694
695    if name in fields:
696        error(lineno, 'duplicate field', name)
697    fields[name] = f
698# end parse_field
699
700
701def parse_arguments(lineno, name, toks):
702    """Parse one argument set from TOKS at LINENO"""
703    global arguments
704    global re_C_ident
705    global anyextern
706
707    flds = []
708    extern = False
709    for t in toks:
710        if re.fullmatch('!extern', t):
711            extern = True
712            anyextern = True
713            continue
714        if not re.fullmatch(re_C_ident, t):
715            error(lineno, 'invalid argument set token "{0}"'.format(t))
716        if t in flds:
717            error(lineno, 'duplicate argument "{0}"'.format(t))
718        flds.append(t)
719
720    if name in arguments:
721        error(lineno, 'duplicate argument set', name)
722    arguments[name] = Arguments(name, flds, extern)
723# end parse_arguments
724
725
726def lookup_field(lineno, name):
727    global fields
728    if name in fields:
729        return fields[name]
730    error(lineno, 'undefined field', name)
731
732
733def add_field(lineno, flds, new_name, f):
734    if new_name in flds:
735        error(lineno, 'duplicate field', new_name)
736    flds[new_name] = f
737    return flds
738
739
740def add_field_byname(lineno, flds, new_name, old_name):
741    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
742
743
744def infer_argument_set(flds):
745    global arguments
746    global decode_function
747
748    for arg in arguments.values():
749        if eq_fields_for_args(flds, arg.fields):
750            return arg
751
752    name = decode_function + str(len(arguments))
753    arg = Arguments(name, flds.keys(), False)
754    arguments[name] = arg
755    return arg
756
757
758def infer_format(arg, fieldmask, flds, width):
759    global arguments
760    global formats
761    global decode_function
762
763    const_flds = {}
764    var_flds = {}
765    for n, c in flds.items():
766        if c is ConstField:
767            const_flds[n] = c
768        else:
769            var_flds[n] = c
770
771    # Look for an existing format with the same argument set and fields
772    for fmt in formats.values():
773        if arg and fmt.base != arg:
774            continue
775        if fieldmask != fmt.fieldmask:
776            continue
777        if width != fmt.width:
778            continue
779        if not eq_fields_for_fmts(flds, fmt.fields):
780            continue
781        return (fmt, const_flds)
782
783    name = decode_function + '_Fmt_' + str(len(formats))
784    if not arg:
785        arg = infer_argument_set(flds)
786
787    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
788    formats[name] = fmt
789
790    return (fmt, const_flds)
791# end infer_format
792
793
794def parse_generic(lineno, parent_pat, name, toks):
795    """Parse one instruction format from TOKS at LINENO"""
796    global fields
797    global arguments
798    global formats
799    global allpatterns
800    global re_arg_ident
801    global re_fld_ident
802    global re_fmt_ident
803    global re_C_ident
804    global insnwidth
805    global insnmask
806    global variablewidth
807
808    is_format = parent_pat is None
809
810    fixedmask = 0
811    fixedbits = 0
812    undefmask = 0
813    width = 0
814    flds = {}
815    arg = None
816    fmt = None
817    for t in toks:
818        # '&Foo' gives a format an explicit argument set.
819        if re.fullmatch(re_arg_ident, t):
820            tt = t[1:]
821            if arg:
822                error(lineno, 'multiple argument sets')
823            if tt in arguments:
824                arg = arguments[tt]
825            else:
826                error(lineno, 'undefined argument set', t)
827            continue
828
829        # '@Foo' gives a pattern an explicit format.
830        if re.fullmatch(re_fmt_ident, t):
831            tt = t[1:]
832            if fmt:
833                error(lineno, 'multiple formats')
834            if tt in formats:
835                fmt = formats[tt]
836            else:
837                error(lineno, 'undefined format', t)
838            continue
839
840        # '%Foo' imports a field.
841        if re.fullmatch(re_fld_ident, t):
842            tt = t[1:]
843            flds = add_field_byname(lineno, flds, tt, tt)
844            continue
845
846        # 'Foo=%Bar' imports a field with a different name.
847        if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
848            (fname, iname) = t.split('=%')
849            flds = add_field_byname(lineno, flds, fname, iname)
850            continue
851
852        # 'Foo=number' sets an argument field to a constant value
853        if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
854            (fname, value) = t.split('=')
855            value = int(value)
856            flds = add_field(lineno, flds, fname, ConstField(value))
857            continue
858
859        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
860        # required ones, or dont-cares.
861        if re.fullmatch('[01.-]+', t):
862            shift = len(t)
863            fms = t.replace('0', '1')
864            fms = fms.replace('.', '0')
865            fms = fms.replace('-', '0')
866            fbs = t.replace('.', '0')
867            fbs = fbs.replace('-', '0')
868            ubm = t.replace('1', '0')
869            ubm = ubm.replace('.', '0')
870            ubm = ubm.replace('-', '1')
871            fms = int(fms, 2)
872            fbs = int(fbs, 2)
873            ubm = int(ubm, 2)
874            fixedbits = (fixedbits << shift) | fbs
875            fixedmask = (fixedmask << shift) | fms
876            undefmask = (undefmask << shift) | ubm
877        # Otherwise, fieldname:fieldwidth
878        elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
879            (fname, flen) = t.split(':')
880            sign = False
881            if flen[0] == 's':
882                sign = True
883                flen = flen[1:]
884            shift = int(flen, 10)
885            if shift + width > insnwidth:
886                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
887            f = Field(sign, insnwidth - width - shift, shift)
888            flds = add_field(lineno, flds, fname, f)
889            fixedbits <<= shift
890            fixedmask <<= shift
891            undefmask <<= shift
892        else:
893            error(lineno, 'invalid token "{0}"'.format(t))
894        width += shift
895
896    if variablewidth and width < insnwidth and width % 8 == 0:
897        shift = insnwidth - width
898        fixedbits <<= shift
899        fixedmask <<= shift
900        undefmask <<= shift
901        undefmask |= (1 << shift) - 1
902
903    # We should have filled in all of the bits of the instruction.
904    elif not (is_format and width == 0) and width != insnwidth:
905        error(lineno, 'definition has {0} bits'.format(width))
906
907    # Do not check for fields overlapping fields; one valid usage
908    # is to be able to duplicate fields via import.
909    fieldmask = 0
910    for f in flds.values():
911        fieldmask |= f.mask
912
913    # Fix up what we've parsed to match either a format or a pattern.
914    if is_format:
915        # Formats cannot reference formats.
916        if fmt:
917            error(lineno, 'format referencing format')
918        # If an argument set is given, then there should be no fields
919        # without a place to store it.
920        if arg:
921            for f in flds.keys():
922                if f not in arg.fields:
923                    error(lineno, 'field {0} not in argument set {1}'
924                                  .format(f, arg.name))
925        else:
926            arg = infer_argument_set(flds)
927        if name in formats:
928            error(lineno, 'duplicate format name', name)
929        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
930                     undefmask, fieldmask, flds, width)
931        formats[name] = fmt
932    else:
933        # Patterns can reference a format ...
934        if fmt:
935            # ... but not an argument simultaneously
936            if arg:
937                error(lineno, 'pattern specifies both format and argument set')
938            if fixedmask & fmt.fixedmask:
939                error(lineno, 'pattern fixed bits overlap format fixed bits')
940            if width != fmt.width:
941                error(lineno, 'pattern uses format of different width')
942            fieldmask |= fmt.fieldmask
943            fixedbits |= fmt.fixedbits
944            fixedmask |= fmt.fixedmask
945            undefmask |= fmt.undefmask
946        else:
947            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
948        arg = fmt.base
949        for f in flds.keys():
950            if f not in arg.fields:
951                error(lineno, 'field {0} not in argument set {1}'
952                              .format(f, arg.name))
953            if f in fmt.fields.keys():
954                error(lineno, 'field {0} set by format and pattern'.format(f))
955        for f in arg.fields:
956            if f not in flds.keys() and f not in fmt.fields.keys():
957                error(lineno, 'field {0} not initialized'.format(f))
958        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
959                      undefmask, fieldmask, flds, width)
960        parent_pat.pats.append(pat)
961        allpatterns.append(pat)
962
963    # Validate the masks that we have assembled.
964    if fieldmask & fixedmask:
965        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
966                      .format(fieldmask, fixedmask))
967    if fieldmask & undefmask:
968        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
969                      .format(fieldmask, undefmask))
970    if fixedmask & undefmask:
971        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
972                      .format(fixedmask, undefmask))
973    if not is_format:
974        allbits = fieldmask | fixedmask | undefmask
975        if allbits != insnmask:
976            error(lineno, 'bits left unspecified (0x{0:08x})'
977                          .format(allbits ^ insnmask))
978# end parse_general
979
980
981def parse_file(f, parent_pat):
982    """Parse all of the patterns within a file"""
983    global re_arg_ident
984    global re_fld_ident
985    global re_fmt_ident
986    global re_pat_ident
987
988    # Read all of the lines of the file.  Concatenate lines
989    # ending in backslash; discard empty lines and comments.
990    toks = []
991    lineno = 0
992    nesting = 0
993    nesting_pats = []
994
995    for line in f:
996        lineno += 1
997
998        # Expand and strip spaces, to find indent.
999        line = line.rstrip()
1000        line = line.expandtabs()
1001        len1 = len(line)
1002        line = line.lstrip()
1003        len2 = len(line)
1004
1005        # Discard comments
1006        end = line.find('#')
1007        if end >= 0:
1008            line = line[:end]
1009
1010        t = line.split()
1011        if len(toks) != 0:
1012            # Next line after continuation
1013            toks.extend(t)
1014        else:
1015            # Allow completely blank lines.
1016            if len1 == 0:
1017                continue
1018            indent = len1 - len2
1019            # Empty line due to comment.
1020            if len(t) == 0:
1021                # Indentation must be correct, even for comment lines.
1022                if indent != nesting:
1023                    error(lineno, 'indentation ', indent, ' != ', nesting)
1024                continue
1025            start_lineno = lineno
1026            toks = t
1027
1028        # Continuation?
1029        if toks[-1] == '\\':
1030            toks.pop()
1031            continue
1032
1033        name = toks[0]
1034        del toks[0]
1035
1036        # End nesting?
1037        if name == '}' or name == ']':
1038            if len(toks) != 0:
1039                error(start_lineno, 'extra tokens after close brace')
1040
1041            # Make sure { } and [ ] nest properly.
1042            if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1043                error(lineno, 'mismatched close brace')
1044
1045            try:
1046                parent_pat = nesting_pats.pop()
1047            except:
1048                error(lineno, 'extra close brace')
1049
1050            nesting -= 2
1051            if indent != nesting:
1052                error(lineno, 'indentation ', indent, ' != ', nesting)
1053
1054            toks = []
1055            continue
1056
1057        # Everything else should have current indentation.
1058        if indent != nesting:
1059            error(start_lineno, 'indentation ', indent, ' != ', nesting)
1060
1061        # Start nesting?
1062        if name == '{' or name == '[':
1063            if len(toks) != 0:
1064                error(start_lineno, 'extra tokens after open brace')
1065
1066            if name == '{':
1067                nested_pat = IncMultiPattern(start_lineno)
1068            else:
1069                nested_pat = ExcMultiPattern(start_lineno)
1070            parent_pat.pats.append(nested_pat)
1071            nesting_pats.append(parent_pat)
1072            parent_pat = nested_pat
1073
1074            nesting += 2
1075            toks = []
1076            continue
1077
1078        # Determine the type of object needing to be parsed.
1079        if re.fullmatch(re_fld_ident, name):
1080            parse_field(start_lineno, name[1:], toks)
1081        elif re.fullmatch(re_arg_ident, name):
1082            parse_arguments(start_lineno, name[1:], toks)
1083        elif re.fullmatch(re_fmt_ident, name):
1084            parse_generic(start_lineno, None, name[1:], toks)
1085        elif re.fullmatch(re_pat_ident, name):
1086            parse_generic(start_lineno, parent_pat, name, toks)
1087        else:
1088            error(lineno, 'invalid token "{0}"'.format(name))
1089        toks = []
1090
1091    if nesting != 0:
1092        error(lineno, 'missing close brace')
1093# end parse_file
1094
1095
1096class SizeTree:
1097    """Class representing a node in a size decode tree"""
1098
1099    def __init__(self, m, w):
1100        self.mask = m
1101        self.subs = []
1102        self.base = None
1103        self.width = w
1104
1105    def str1(self, i):
1106        ind = str_indent(i)
1107        r = '{0}{1:08x}'.format(ind, self.mask)
1108        r += ' [\n'
1109        for (b, s) in self.subs:
1110            r += '{0}  {1:08x}:\n'.format(ind, b)
1111            r += s.str1(i + 4) + '\n'
1112        r += ind + ']'
1113        return r
1114
1115    def __str__(self):
1116        return self.str1(0)
1117
1118    def output_code(self, i, extracted, outerbits, outermask):
1119        ind = str_indent(i)
1120
1121        # If we need to load more bytes to test, do so now.
1122        if extracted < self.width:
1123            output(ind, 'insn = ', decode_function,
1124                   '_load_bytes(ctx, insn, {0}, {1});\n'
1125                   .format(extracted // 8, self.width // 8));
1126            extracted = self.width
1127
1128        # Attempt to aid the compiler in producing compact switch statements.
1129        # If the bits in the mask are contiguous, extract them.
1130        sh = is_contiguous(self.mask)
1131        if sh > 0:
1132            # Propagate SH down into the local functions.
1133            def str_switch(b, sh=sh):
1134                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1135
1136            def str_case(b, sh=sh):
1137                return '0x{0:x}'.format(b >> sh)
1138        else:
1139            def str_switch(b):
1140                return 'insn & 0x{0:08x}'.format(b)
1141
1142            def str_case(b):
1143                return '0x{0:08x}'.format(b)
1144
1145        output(ind, 'switch (', str_switch(self.mask), ') {\n')
1146        for b, s in sorted(self.subs):
1147            innermask = outermask | self.mask
1148            innerbits = outerbits | b
1149            output(ind, 'case ', str_case(b), ':\n')
1150            output(ind, '    /* ',
1151                   str_match_bits(innerbits, innermask), ' */\n')
1152            s.output_code(i + 4, extracted, innerbits, innermask)
1153        output(ind, '}\n')
1154        output(ind, 'return insn;\n')
1155# end SizeTree
1156
1157class SizeLeaf:
1158    """Class representing a leaf node in a size decode tree"""
1159
1160    def __init__(self, m, w):
1161        self.mask = m
1162        self.width = w
1163
1164    def str1(self, i):
1165        ind = str_indent(i)
1166        return '{0}{1:08x}'.format(ind, self.mask)
1167
1168    def __str__(self):
1169        return self.str1(0)
1170
1171    def output_code(self, i, extracted, outerbits, outermask):
1172        global decode_function
1173        ind = str_indent(i)
1174
1175        # If we need to load more bytes, do so now.
1176        if extracted < self.width:
1177            output(ind, 'insn = ', decode_function,
1178                   '_load_bytes(ctx, insn, {0}, {1});\n'
1179                   .format(extracted // 8, self.width // 8));
1180            extracted = self.width
1181        output(ind, 'return insn;\n')
1182# end SizeLeaf
1183
1184
1185def build_size_tree(pats, width, outerbits, outermask):
1186    global insnwidth
1187
1188    # Collect the mask of bits that are fixed in this width
1189    innermask = 0xff << (insnwidth - width)
1190    innermask &= ~outermask
1191    minwidth = None
1192    onewidth = True
1193    for i in pats:
1194        innermask &= i.fixedmask
1195        if minwidth is None:
1196            minwidth = i.width
1197        elif minwidth != i.width:
1198            onewidth = False;
1199            if minwidth < i.width:
1200                minwidth = i.width
1201
1202    if onewidth:
1203        return SizeLeaf(innermask, minwidth)
1204
1205    if innermask == 0:
1206        if width < minwidth:
1207            return build_size_tree(pats, width + 8, outerbits, outermask)
1208
1209        pnames = []
1210        for p in pats:
1211            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1212        error_with_file(pats[0].file, pats[0].lineno,
1213                        'overlapping patterns size {0}:'.format(width), pnames)
1214
1215    bins = {}
1216    for i in pats:
1217        fb = i.fixedbits & innermask
1218        if fb in bins:
1219            bins[fb].append(i)
1220        else:
1221            bins[fb] = [i]
1222
1223    fullmask = outermask | innermask
1224    lens = sorted(bins.keys())
1225    if len(lens) == 1:
1226        b = lens[0]
1227        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1228
1229    r = SizeTree(innermask, width)
1230    for b, l in bins.items():
1231        s = build_size_tree(l, width, b | outerbits, fullmask)
1232        r.subs.append((b, s))
1233    return r
1234# end build_size_tree
1235
1236
1237def prop_size(tree):
1238    """Propagate minimum widths up the decode size tree"""
1239
1240    if isinstance(tree, SizeTree):
1241        min = None
1242        for (b, s) in tree.subs:
1243            width = prop_size(s)
1244            if min is None or min > width:
1245                min = width
1246        assert min >= tree.width
1247        tree.width = min
1248    else:
1249        min = tree.width
1250    return min
1251# end prop_size
1252
1253
1254def main():
1255    global arguments
1256    global formats
1257    global allpatterns
1258    global translate_scope
1259    global translate_prefix
1260    global output_fd
1261    global output_file
1262    global input_file
1263    global insnwidth
1264    global insntype
1265    global insnmask
1266    global decode_function
1267    global variablewidth
1268    global anyextern
1269
1270    decode_scope = 'static '
1271
1272    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1273                 'static-decode=', 'varinsnwidth=']
1274    try:
1275        (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1276    except getopt.GetoptError as err:
1277        error(0, err)
1278    for o, a in opts:
1279        if o in ('-o', '--output'):
1280            output_file = a
1281        elif o == '--decode':
1282            decode_function = a
1283            decode_scope = ''
1284        elif o == '--static-decode':
1285            decode_function = a
1286        elif o == '--translate':
1287            translate_prefix = a
1288            translate_scope = ''
1289        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1290            if o == '--varinsnwidth':
1291                variablewidth = True
1292            insnwidth = int(a)
1293            if insnwidth == 16:
1294                insntype = 'uint16_t'
1295                insnmask = 0xffff
1296            elif insnwidth != 32:
1297                error(0, 'cannot handle insns of width', insnwidth)
1298        else:
1299            assert False, 'unhandled option'
1300
1301    if len(args) < 1:
1302        error(0, 'missing input file')
1303
1304    toppat = ExcMultiPattern(0)
1305
1306    for filename in args:
1307        input_file = filename
1308        f = open(filename, 'rt', encoding='utf-8')
1309        parse_file(f, toppat)
1310        f.close()
1311
1312    # We do not want to compute masks for toppat, because those masks
1313    # are used as a starting point for build_tree.  For toppat, we must
1314    # insist that decode begins from naught.
1315    for i in toppat.pats:
1316        i.prop_masks()
1317
1318    toppat.build_tree()
1319    toppat.prop_format()
1320
1321    if variablewidth:
1322        for i in toppat.pats:
1323            i.prop_width()
1324        stree = build_size_tree(toppat.pats, 8, 0, 0)
1325        prop_size(stree)
1326
1327    if output_file:
1328        output_fd = open(output_file, 'wt', encoding='utf-8')
1329    else:
1330        output_fd = io.TextIOWrapper(sys.stdout.buffer,
1331                                     encoding=sys.stdout.encoding,
1332                                     errors="ignore")
1333
1334    output_autogen()
1335    for n in sorted(arguments.keys()):
1336        f = arguments[n]
1337        f.output_def()
1338
1339    # A single translate function can be invoked for different patterns.
1340    # Make sure that the argument sets are the same, and declare the
1341    # function only once.
1342    #
1343    # If we're sharing formats, we're likely also sharing trans_* functions,
1344    # but we can't tell which ones.  Prevent issues from the compiler by
1345    # suppressing redundant declaration warnings.
1346    if anyextern:
1347        output("#pragma GCC diagnostic push\n",
1348               "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1349               "#ifdef __clang__\n"
1350               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1351               "#endif\n\n")
1352
1353    out_pats = {}
1354    for i in allpatterns:
1355        if i.name in out_pats:
1356            p = out_pats[i.name]
1357            if i.base.base != p.base.base:
1358                error(0, i.name, ' has conflicting argument sets')
1359        else:
1360            i.output_decl()
1361            out_pats[i.name] = i
1362    output('\n')
1363
1364    if anyextern:
1365        output("#pragma GCC diagnostic pop\n\n")
1366
1367    for n in sorted(formats.keys()):
1368        f = formats[n]
1369        f.output_extract()
1370
1371    output(decode_scope, 'bool ', decode_function,
1372           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1373
1374    i4 = str_indent(4)
1375
1376    if len(allpatterns) != 0:
1377        output(i4, 'union {\n')
1378        for n in sorted(arguments.keys()):
1379            f = arguments[n]
1380            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1381        output(i4, '} u;\n\n')
1382        toppat.output_code(4, False, 0, 0)
1383
1384    output(i4, 'return false;\n')
1385    output('}\n')
1386
1387    if variablewidth:
1388        output('\n', decode_scope, insntype, ' ', decode_function,
1389               '_load(DisasContext *ctx)\n{\n',
1390               '    ', insntype, ' insn = 0;\n\n')
1391        stree.output_code(4, 0, 0, 0)
1392        output('}\n')
1393
1394    if output_file:
1395        output_fd.close()
1396# end main
1397
1398
1399if __name__ == '__main__':
1400    main()
1401