xref: /qemu/scripts/decodetree.py (revision a81df1b6)
1#!/usr/bin/env python3
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20# See the syntax and semantics in docs/devel/decodetree.rst.
21#
22
23import os
24import re
25import sys
26import getopt
27
28insnwidth = 32
29insnmask = 0xffffffff
30variablewidth = False
31fields = {}
32arguments = {}
33formats = {}
34allpatterns = []
35anyextern = False
36
37translate_prefix = 'trans'
38translate_scope = 'static '
39input_file = ''
40output_file = None
41output_fd = None
42insntype = 'uint32_t'
43decode_function = 'decode'
44
45re_ident = '[a-zA-Z][a-zA-Z0-9_]*'
46
47
48def error_with_file(file, lineno, *args):
49    """Print an error message from file:line and args and exit."""
50    global output_file
51    global output_fd
52
53    prefix = ''
54    if file:
55        prefix += '{0}:'.format(file)
56    if lineno:
57        prefix += '{0}:'.format(lineno)
58    if prefix:
59        prefix += ' '
60    print(prefix, end='error: ', file=sys.stderr)
61    print(*args, file=sys.stderr)
62
63    if output_file and output_fd:
64        output_fd.close()
65        os.remove(output_file)
66    exit(1)
67# end error_with_file
68
69
70def error(lineno, *args):
71    error_with_file(input_file, lineno, *args)
72# end error
73
74
75def output(*args):
76    global output_fd
77    for a in args:
78        output_fd.write(a)
79
80
81def output_autogen():
82    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
83
84
85def str_indent(c):
86    """Return a string with C spaces"""
87    return ' ' * c
88
89
90def str_fields(fields):
91    """Return a string uniquely identifing FIELDS"""
92    r = ''
93    for n in sorted(fields.keys()):
94        r += '_' + n
95    return r[1:]
96
97
98def str_match_bits(bits, mask):
99    """Return a string pretty-printing BITS/MASK"""
100    global insnwidth
101
102    i = 1 << (insnwidth - 1)
103    space = 0x01010100
104    r = ''
105    while i != 0:
106        if i & mask:
107            if i & bits:
108                r += '1'
109            else:
110                r += '0'
111        else:
112            r += '.'
113        if i & space:
114            r += ' '
115        i >>= 1
116    return r
117
118
119def is_pow2(x):
120    """Return true iff X is equal to a power of 2."""
121    return (x & (x - 1)) == 0
122
123
124def ctz(x):
125    """Return the number of times 2 factors into X."""
126    assert x != 0
127    r = 0
128    while ((x >> r) & 1) == 0:
129        r += 1
130    return r
131
132
133def is_contiguous(bits):
134    if bits == 0:
135        return -1
136    shift = ctz(bits)
137    if is_pow2((bits >> shift) + 1):
138        return shift
139    else:
140        return -1
141
142
143def eq_fields_for_args(flds_a, flds_b):
144    if len(flds_a) != len(flds_b):
145        return False
146    for k, a in flds_a.items():
147        if k not in flds_b:
148            return False
149    return True
150
151
152def eq_fields_for_fmts(flds_a, flds_b):
153    if len(flds_a) != len(flds_b):
154        return False
155    for k, a in flds_a.items():
156        if k not in flds_b:
157            return False
158        b = flds_b[k]
159        if a.__class__ != b.__class__ or a != b:
160            return False
161    return True
162
163
164class Field:
165    """Class representing a simple instruction field"""
166    def __init__(self, sign, pos, len):
167        self.sign = sign
168        self.pos = pos
169        self.len = len
170        self.mask = ((1 << len) - 1) << pos
171
172    def __str__(self):
173        if self.sign:
174            s = 's'
175        else:
176            s = ''
177        return str(self.pos) + ':' + s + str(self.len)
178
179    def str_extract(self):
180        if self.sign:
181            extr = 'sextract32'
182        else:
183            extr = 'extract32'
184        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
185
186    def __eq__(self, other):
187        return self.sign == other.sign and self.mask == other.mask
188
189    def __ne__(self, other):
190        return not self.__eq__(other)
191# end Field
192
193
194class MultiField:
195    """Class representing a compound instruction field"""
196    def __init__(self, subs, mask):
197        self.subs = subs
198        self.sign = subs[0].sign
199        self.mask = mask
200
201    def __str__(self):
202        return str(self.subs)
203
204    def str_extract(self):
205        ret = '0'
206        pos = 0
207        for f in reversed(self.subs):
208            if pos == 0:
209                ret = f.str_extract()
210            else:
211                ret = 'deposit32({0}, {1}, {2}, {3})' \
212                      .format(ret, pos, 32 - pos, f.str_extract())
213            pos += f.len
214        return ret
215
216    def __ne__(self, other):
217        if len(self.subs) != len(other.subs):
218            return True
219        for a, b in zip(self.subs, other.subs):
220            if a.__class__ != b.__class__ or a != b:
221                return True
222        return False
223
224    def __eq__(self, other):
225        return not self.__ne__(other)
226# end MultiField
227
228
229class ConstField:
230    """Class representing an argument field with constant value"""
231    def __init__(self, value):
232        self.value = value
233        self.mask = 0
234        self.sign = value < 0
235
236    def __str__(self):
237        return str(self.value)
238
239    def str_extract(self):
240        return str(self.value)
241
242    def __cmp__(self, other):
243        return self.value - other.value
244# end ConstField
245
246
247class FunctionField:
248    """Class representing a field passed through a function"""
249    def __init__(self, func, base):
250        self.mask = base.mask
251        self.sign = base.sign
252        self.base = base
253        self.func = func
254
255    def __str__(self):
256        return self.func + '(' + str(self.base) + ')'
257
258    def str_extract(self):
259        return self.func + '(ctx, ' + self.base.str_extract() + ')'
260
261    def __eq__(self, other):
262        return self.func == other.func and self.base == other.base
263
264    def __ne__(self, other):
265        return not self.__eq__(other)
266# end FunctionField
267
268
269class ParameterField:
270    """Class representing a pseudo-field read from a function"""
271    def __init__(self, func):
272        self.mask = 0
273        self.sign = 0
274        self.func = func
275
276    def __str__(self):
277        return self.func
278
279    def str_extract(self):
280        return self.func + '(ctx)'
281
282    def __eq__(self, other):
283        return self.func == other.func
284
285    def __ne__(self, other):
286        return not self.__eq__(other)
287# end ParameterField
288
289
290class Arguments:
291    """Class representing the extracted fields of a format"""
292    def __init__(self, nm, flds, extern):
293        self.name = nm
294        self.extern = extern
295        self.fields = sorted(flds)
296
297    def __str__(self):
298        return self.name + ' ' + str(self.fields)
299
300    def struct_name(self):
301        return 'arg_' + self.name
302
303    def output_def(self):
304        if not self.extern:
305            output('typedef struct {\n')
306            for n in self.fields:
307                output('    int ', n, ';\n')
308            output('} ', self.struct_name(), ';\n\n')
309# end Arguments
310
311
312class General:
313    """Common code between instruction formats and instruction patterns"""
314    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
315        self.name = name
316        self.file = input_file
317        self.lineno = lineno
318        self.base = base
319        self.fixedbits = fixb
320        self.fixedmask = fixm
321        self.undefmask = udfm
322        self.fieldmask = fldm
323        self.fields = flds
324        self.width = w
325
326    def __str__(self):
327        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
328
329    def str1(self, i):
330        return str_indent(i) + self.__str__()
331# end General
332
333
334class Format(General):
335    """Class representing an instruction format"""
336
337    def extract_name(self):
338        global decode_function
339        return decode_function + '_extract_' + self.name
340
341    def output_extract(self):
342        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
343               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
344        for n, f in self.fields.items():
345            output('    a->', n, ' = ', f.str_extract(), ';\n')
346        output('}\n\n')
347# end Format
348
349
350class Pattern(General):
351    """Class representing an instruction pattern"""
352
353    def output_decl(self):
354        global translate_scope
355        global translate_prefix
356        output('typedef ', self.base.base.struct_name(),
357               ' arg_', self.name, ';\n')
358        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
359               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
360
361    def output_code(self, i, extracted, outerbits, outermask):
362        global translate_prefix
363        ind = str_indent(i)
364        arg = self.base.base.name
365        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
366        if not extracted:
367            output(ind, self.base.extract_name(),
368                   '(ctx, &u.f_', arg, ', insn);\n')
369        for n, f in self.fields.items():
370            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
371        output(ind, 'if (', translate_prefix, '_', self.name,
372               '(ctx, &u.f_', arg, ')) return true;\n')
373
374    # Normal patterns do not have children.
375    def build_tree(self):
376        return
377    def prop_masks(self):
378        return
379    def prop_format(self):
380        return
381    def prop_width(self):
382        return
383
384# end Pattern
385
386
387class MultiPattern(General):
388    """Class representing a set of instruction patterns"""
389
390    def __init__(self, lineno):
391        self.file = input_file
392        self.lineno = lineno
393        self.pats = []
394        self.base = None
395        self.fixedbits = 0
396        self.fixedmask = 0
397        self.undefmask = 0
398        self.width = None
399
400    def __str__(self):
401        r = 'group'
402        if self.fixedbits is not None:
403            r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
404        return r
405
406    def output_decl(self):
407        for p in self.pats:
408            p.output_decl()
409
410    def prop_masks(self):
411        global insnmask
412
413        fixedmask = insnmask
414        undefmask = insnmask
415
416        # Collect fixedmask/undefmask for all of the children.
417        for p in self.pats:
418            p.prop_masks()
419            fixedmask &= p.fixedmask
420            undefmask &= p.undefmask
421
422        # Widen fixedmask until all fixedbits match
423        repeat = True
424        fixedbits = 0
425        while repeat and fixedmask != 0:
426            fixedbits = None
427            for p in self.pats:
428                thisbits = p.fixedbits & fixedmask
429                if fixedbits is None:
430                    fixedbits = thisbits
431                elif fixedbits != thisbits:
432                    fixedmask &= ~(fixedbits ^ thisbits)
433                    break
434            else:
435                repeat = False
436
437        self.fixedbits = fixedbits
438        self.fixedmask = fixedmask
439        self.undefmask = undefmask
440
441    def build_tree(self):
442        for p in self.pats:
443            p.build_tree()
444
445    def prop_format(self):
446        for p in self.pats:
447            p.build_tree()
448
449    def prop_width(self):
450        width = None
451        for p in self.pats:
452            p.prop_width()
453            if width is None:
454                width = p.width
455            elif width != p.width:
456                error_with_file(self.file, self.lineno,
457                                'width mismatch in patterns within braces')
458        self.width = width
459
460# end MultiPattern
461
462
463class IncMultiPattern(MultiPattern):
464    """Class representing an overlapping set of instruction patterns"""
465
466    def output_code(self, i, extracted, outerbits, outermask):
467        global translate_prefix
468        ind = str_indent(i)
469        for p in self.pats:
470            if outermask != p.fixedmask:
471                innermask = p.fixedmask & ~outermask
472                innerbits = p.fixedbits & ~outermask
473                output(ind, 'if ((insn & ',
474                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
475                       ') {\n')
476                output(ind, '    /* ',
477                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
478                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
479                output(ind, '}\n')
480            else:
481                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
482#end IncMultiPattern
483
484
485class Tree:
486    """Class representing a node in a decode tree"""
487
488    def __init__(self, fm, tm):
489        self.fixedmask = fm
490        self.thismask = tm
491        self.subs = []
492        self.base = None
493
494    def str1(self, i):
495        ind = str_indent(i)
496        r = '{0}{1:08x}'.format(ind, self.fixedmask)
497        if self.format:
498            r += ' ' + self.format.name
499        r += ' [\n'
500        for (b, s) in self.subs:
501            r += '{0}  {1:08x}:\n'.format(ind, b)
502            r += s.str1(i + 4) + '\n'
503        r += ind + ']'
504        return r
505
506    def __str__(self):
507        return self.str1(0)
508
509    def output_code(self, i, extracted, outerbits, outermask):
510        ind = str_indent(i)
511
512        # If we identified all nodes below have the same format,
513        # extract the fields now.
514        if not extracted and self.base:
515            output(ind, self.base.extract_name(),
516                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
517            extracted = True
518
519        # Attempt to aid the compiler in producing compact switch statements.
520        # If the bits in the mask are contiguous, extract them.
521        sh = is_contiguous(self.thismask)
522        if sh > 0:
523            # Propagate SH down into the local functions.
524            def str_switch(b, sh=sh):
525                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
526
527            def str_case(b, sh=sh):
528                return '0x{0:x}'.format(b >> sh)
529        else:
530            def str_switch(b):
531                return 'insn & 0x{0:08x}'.format(b)
532
533            def str_case(b):
534                return '0x{0:08x}'.format(b)
535
536        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
537        for b, s in sorted(self.subs):
538            assert (self.thismask & ~s.fixedmask) == 0
539            innermask = outermask | self.thismask
540            innerbits = outerbits | b
541            output(ind, 'case ', str_case(b), ':\n')
542            output(ind, '    /* ',
543                   str_match_bits(innerbits, innermask), ' */\n')
544            s.output_code(i + 4, extracted, innerbits, innermask)
545            output(ind, '    return false;\n')
546        output(ind, '}\n')
547# end Tree
548
549
550class ExcMultiPattern(MultiPattern):
551    """Class representing a non-overlapping set of instruction patterns"""
552
553    def output_code(self, i, extracted, outerbits, outermask):
554        # Defer everything to our decomposed Tree node
555        self.tree.output_code(i, extracted, outerbits, outermask)
556
557    @staticmethod
558    def __build_tree(pats, outerbits, outermask):
559        # Find the intersection of all remaining fixedmask.
560        innermask = ~outermask & insnmask
561        for i in pats:
562            innermask &= i.fixedmask
563
564        if innermask == 0:
565            # Edge condition: One pattern covers the entire insnmask
566            if len(pats) == 1:
567                t = Tree(outermask, innermask)
568                t.subs.append((0, pats[0]))
569                return t
570
571            text = 'overlapping patterns:'
572            for p in pats:
573                text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
574            error_with_file(pats[0].file, pats[0].lineno, text)
575
576        fullmask = outermask | innermask
577
578        # Sort each element of pats into the bin selected by the mask.
579        bins = {}
580        for i in pats:
581            fb = i.fixedbits & innermask
582            if fb in bins:
583                bins[fb].append(i)
584            else:
585                bins[fb] = [i]
586
587        # We must recurse if any bin has more than one element or if
588        # the single element in the bin has not been fully matched.
589        t = Tree(fullmask, innermask)
590
591        for b, l in bins.items():
592            s = l[0]
593            if len(l) > 1 or s.fixedmask & ~fullmask != 0:
594                s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
595            t.subs.append((b, s))
596
597        return t
598
599    def build_tree(self):
600        super().prop_format()
601        self.tree = self.__build_tree(self.pats, self.fixedbits,
602                                      self.fixedmask)
603
604    @staticmethod
605    def __prop_format(tree):
606        """Propagate Format objects into the decode tree"""
607
608        # Depth first search.
609        for (b, s) in tree.subs:
610            if isinstance(s, Tree):
611                ExcMultiPattern.__prop_format(s)
612
613        # If all entries in SUBS have the same format, then
614        # propagate that into the tree.
615        f = None
616        for (b, s) in tree.subs:
617            if f is None:
618                f = s.base
619                if f is None:
620                    return
621            if f is not s.base:
622                return
623        tree.base = f
624
625    def prop_format(self):
626        super().prop_format()
627        self.__prop_format(self.tree)
628
629# end ExcMultiPattern
630
631
632def parse_field(lineno, name, toks):
633    """Parse one instruction field from TOKS at LINENO"""
634    global fields
635    global re_ident
636    global insnwidth
637
638    # A "simple" field will have only one entry;
639    # a "multifield" will have several.
640    subs = []
641    width = 0
642    func = None
643    for t in toks:
644        if re.fullmatch('!function=' + re_ident, t):
645            if func:
646                error(lineno, 'duplicate function')
647            func = t.split('=')
648            func = func[1]
649            continue
650
651        if re.fullmatch('[0-9]+:s[0-9]+', t):
652            # Signed field extract
653            subtoks = t.split(':s')
654            sign = True
655        elif re.fullmatch('[0-9]+:[0-9]+', t):
656            # Unsigned field extract
657            subtoks = t.split(':')
658            sign = False
659        else:
660            error(lineno, 'invalid field token "{0}"'.format(t))
661        po = int(subtoks[0])
662        le = int(subtoks[1])
663        if po + le > insnwidth:
664            error(lineno, 'field {0} too large'.format(t))
665        f = Field(sign, po, le)
666        subs.append(f)
667        width += le
668
669    if width > insnwidth:
670        error(lineno, 'field too large')
671    if len(subs) == 0:
672        if func:
673            f = ParameterField(func)
674        else:
675            error(lineno, 'field with no value')
676    else:
677        if len(subs) == 1:
678            f = subs[0]
679        else:
680            mask = 0
681            for s in subs:
682                if mask & s.mask:
683                    error(lineno, 'field components overlap')
684                mask |= s.mask
685            f = MultiField(subs, mask)
686        if func:
687            f = FunctionField(func, f)
688
689    if name in fields:
690        error(lineno, 'duplicate field', name)
691    fields[name] = f
692# end parse_field
693
694
695def parse_arguments(lineno, name, toks):
696    """Parse one argument set from TOKS at LINENO"""
697    global arguments
698    global re_ident
699    global anyextern
700
701    flds = []
702    extern = False
703    for t in toks:
704        if re.fullmatch('!extern', t):
705            extern = True
706            anyextern = True
707            continue
708        if not re.fullmatch(re_ident, t):
709            error(lineno, 'invalid argument set token "{0}"'.format(t))
710        if t in flds:
711            error(lineno, 'duplicate argument "{0}"'.format(t))
712        flds.append(t)
713
714    if name in arguments:
715        error(lineno, 'duplicate argument set', name)
716    arguments[name] = Arguments(name, flds, extern)
717# end parse_arguments
718
719
720def lookup_field(lineno, name):
721    global fields
722    if name in fields:
723        return fields[name]
724    error(lineno, 'undefined field', name)
725
726
727def add_field(lineno, flds, new_name, f):
728    if new_name in flds:
729        error(lineno, 'duplicate field', new_name)
730    flds[new_name] = f
731    return flds
732
733
734def add_field_byname(lineno, flds, new_name, old_name):
735    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
736
737
738def infer_argument_set(flds):
739    global arguments
740    global decode_function
741
742    for arg in arguments.values():
743        if eq_fields_for_args(flds, arg.fields):
744            return arg
745
746    name = decode_function + str(len(arguments))
747    arg = Arguments(name, flds.keys(), False)
748    arguments[name] = arg
749    return arg
750
751
752def infer_format(arg, fieldmask, flds, width):
753    global arguments
754    global formats
755    global decode_function
756
757    const_flds = {}
758    var_flds = {}
759    for n, c in flds.items():
760        if c is ConstField:
761            const_flds[n] = c
762        else:
763            var_flds[n] = c
764
765    # Look for an existing format with the same argument set and fields
766    for fmt in formats.values():
767        if arg and fmt.base != arg:
768            continue
769        if fieldmask != fmt.fieldmask:
770            continue
771        if width != fmt.width:
772            continue
773        if not eq_fields_for_fmts(flds, fmt.fields):
774            continue
775        return (fmt, const_flds)
776
777    name = decode_function + '_Fmt_' + str(len(formats))
778    if not arg:
779        arg = infer_argument_set(flds)
780
781    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
782    formats[name] = fmt
783
784    return (fmt, const_flds)
785# end infer_format
786
787
788def parse_generic(lineno, parent_pat, name, toks):
789    """Parse one instruction format from TOKS at LINENO"""
790    global fields
791    global arguments
792    global formats
793    global allpatterns
794    global re_ident
795    global insnwidth
796    global insnmask
797    global variablewidth
798
799    is_format = parent_pat is None
800
801    fixedmask = 0
802    fixedbits = 0
803    undefmask = 0
804    width = 0
805    flds = {}
806    arg = None
807    fmt = None
808    for t in toks:
809        # '&Foo' gives a format an explcit argument set.
810        if t[0] == '&':
811            tt = t[1:]
812            if arg:
813                error(lineno, 'multiple argument sets')
814            if tt in arguments:
815                arg = arguments[tt]
816            else:
817                error(lineno, 'undefined argument set', t)
818            continue
819
820        # '@Foo' gives a pattern an explicit format.
821        if t[0] == '@':
822            tt = t[1:]
823            if fmt:
824                error(lineno, 'multiple formats')
825            if tt in formats:
826                fmt = formats[tt]
827            else:
828                error(lineno, 'undefined format', t)
829            continue
830
831        # '%Foo' imports a field.
832        if t[0] == '%':
833            tt = t[1:]
834            flds = add_field_byname(lineno, flds, tt, tt)
835            continue
836
837        # 'Foo=%Bar' imports a field with a different name.
838        if re.fullmatch(re_ident + '=%' + re_ident, t):
839            (fname, iname) = t.split('=%')
840            flds = add_field_byname(lineno, flds, fname, iname)
841            continue
842
843        # 'Foo=number' sets an argument field to a constant value
844        if re.fullmatch(re_ident + '=[+-]?[0-9]+', t):
845            (fname, value) = t.split('=')
846            value = int(value)
847            flds = add_field(lineno, flds, fname, ConstField(value))
848            continue
849
850        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
851        # required ones, or dont-cares.
852        if re.fullmatch('[01.-]+', t):
853            shift = len(t)
854            fms = t.replace('0', '1')
855            fms = fms.replace('.', '0')
856            fms = fms.replace('-', '0')
857            fbs = t.replace('.', '0')
858            fbs = fbs.replace('-', '0')
859            ubm = t.replace('1', '0')
860            ubm = ubm.replace('.', '0')
861            ubm = ubm.replace('-', '1')
862            fms = int(fms, 2)
863            fbs = int(fbs, 2)
864            ubm = int(ubm, 2)
865            fixedbits = (fixedbits << shift) | fbs
866            fixedmask = (fixedmask << shift) | fms
867            undefmask = (undefmask << shift) | ubm
868        # Otherwise, fieldname:fieldwidth
869        elif re.fullmatch(re_ident + ':s?[0-9]+', t):
870            (fname, flen) = t.split(':')
871            sign = False
872            if flen[0] == 's':
873                sign = True
874                flen = flen[1:]
875            shift = int(flen, 10)
876            if shift + width > insnwidth:
877                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
878            f = Field(sign, insnwidth - width - shift, shift)
879            flds = add_field(lineno, flds, fname, f)
880            fixedbits <<= shift
881            fixedmask <<= shift
882            undefmask <<= shift
883        else:
884            error(lineno, 'invalid token "{0}"'.format(t))
885        width += shift
886
887    if variablewidth and width < insnwidth and width % 8 == 0:
888        shift = insnwidth - width
889        fixedbits <<= shift
890        fixedmask <<= shift
891        undefmask <<= shift
892        undefmask |= (1 << shift) - 1
893
894    # We should have filled in all of the bits of the instruction.
895    elif not (is_format and width == 0) and width != insnwidth:
896        error(lineno, 'definition has {0} bits'.format(width))
897
898    # Do not check for fields overlaping fields; one valid usage
899    # is to be able to duplicate fields via import.
900    fieldmask = 0
901    for f in flds.values():
902        fieldmask |= f.mask
903
904    # Fix up what we've parsed to match either a format or a pattern.
905    if is_format:
906        # Formats cannot reference formats.
907        if fmt:
908            error(lineno, 'format referencing format')
909        # If an argument set is given, then there should be no fields
910        # without a place to store it.
911        if arg:
912            for f in flds.keys():
913                if f not in arg.fields:
914                    error(lineno, 'field {0} not in argument set {1}'
915                                  .format(f, arg.name))
916        else:
917            arg = infer_argument_set(flds)
918        if name in formats:
919            error(lineno, 'duplicate format name', name)
920        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
921                     undefmask, fieldmask, flds, width)
922        formats[name] = fmt
923    else:
924        # Patterns can reference a format ...
925        if fmt:
926            # ... but not an argument simultaneously
927            if arg:
928                error(lineno, 'pattern specifies both format and argument set')
929            if fixedmask & fmt.fixedmask:
930                error(lineno, 'pattern fixed bits overlap format fixed bits')
931            if width != fmt.width:
932                error(lineno, 'pattern uses format of different width')
933            fieldmask |= fmt.fieldmask
934            fixedbits |= fmt.fixedbits
935            fixedmask |= fmt.fixedmask
936            undefmask |= fmt.undefmask
937        else:
938            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
939        arg = fmt.base
940        for f in flds.keys():
941            if f not in arg.fields:
942                error(lineno, 'field {0} not in argument set {1}'
943                              .format(f, arg.name))
944            if f in fmt.fields.keys():
945                error(lineno, 'field {0} set by format and pattern'.format(f))
946        for f in arg.fields:
947            if f not in flds.keys() and f not in fmt.fields.keys():
948                error(lineno, 'field {0} not initialized'.format(f))
949        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
950                      undefmask, fieldmask, flds, width)
951        parent_pat.pats.append(pat)
952        allpatterns.append(pat)
953
954    # Validate the masks that we have assembled.
955    if fieldmask & fixedmask:
956        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
957                      .format(fieldmask, fixedmask))
958    if fieldmask & undefmask:
959        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
960                      .format(fieldmask, undefmask))
961    if fixedmask & undefmask:
962        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
963                      .format(fixedmask, undefmask))
964    if not is_format:
965        allbits = fieldmask | fixedmask | undefmask
966        if allbits != insnmask:
967            error(lineno, 'bits left unspecified (0x{0:08x})'
968                          .format(allbits ^ insnmask))
969# end parse_general
970
971
972def parse_file(f, parent_pat):
973    """Parse all of the patterns within a file"""
974
975    # Read all of the lines of the file.  Concatenate lines
976    # ending in backslash; discard empty lines and comments.
977    toks = []
978    lineno = 0
979    nesting = 0
980    nesting_pats = []
981
982    for line in f:
983        lineno += 1
984
985        # Expand and strip spaces, to find indent.
986        line = line.rstrip()
987        line = line.expandtabs()
988        len1 = len(line)
989        line = line.lstrip()
990        len2 = len(line)
991
992        # Discard comments
993        end = line.find('#')
994        if end >= 0:
995            line = line[:end]
996
997        t = line.split()
998        if len(toks) != 0:
999            # Next line after continuation
1000            toks.extend(t)
1001        else:
1002            # Allow completely blank lines.
1003            if len1 == 0:
1004                continue
1005            indent = len1 - len2
1006            # Empty line due to comment.
1007            if len(t) == 0:
1008                # Indentation must be correct, even for comment lines.
1009                if indent != nesting:
1010                    error(lineno, 'indentation ', indent, ' != ', nesting)
1011                continue
1012            start_lineno = lineno
1013            toks = t
1014
1015        # Continuation?
1016        if toks[-1] == '\\':
1017            toks.pop()
1018            continue
1019
1020        name = toks[0]
1021        del toks[0]
1022
1023        # End nesting?
1024        if name == '}' or name == ']':
1025            if len(toks) != 0:
1026                error(start_lineno, 'extra tokens after close brace')
1027
1028            # Make sure { } and [ ] nest properly.
1029            if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1030                error(lineno, 'mismatched close brace')
1031
1032            try:
1033                parent_pat = nesting_pats.pop()
1034            except:
1035                error(lineno, 'extra close brace')
1036
1037            nesting -= 2
1038            if indent != nesting:
1039                error(lineno, 'indentation ', indent, ' != ', nesting)
1040
1041            toks = []
1042            continue
1043
1044        # Everything else should have current indentation.
1045        if indent != nesting:
1046            error(start_lineno, 'indentation ', indent, ' != ', nesting)
1047
1048        # Start nesting?
1049        if name == '{' or name == '[':
1050            if len(toks) != 0:
1051                error(start_lineno, 'extra tokens after open brace')
1052
1053            if name == '{':
1054                nested_pat = IncMultiPattern(start_lineno)
1055            else:
1056                nested_pat = ExcMultiPattern(start_lineno)
1057            parent_pat.pats.append(nested_pat)
1058            nesting_pats.append(parent_pat)
1059            parent_pat = nested_pat
1060
1061            nesting += 2
1062            toks = []
1063            continue
1064
1065        # Determine the type of object needing to be parsed.
1066        if name[0] == '%':
1067            parse_field(start_lineno, name[1:], toks)
1068        elif name[0] == '&':
1069            parse_arguments(start_lineno, name[1:], toks)
1070        elif name[0] == '@':
1071            parse_generic(start_lineno, None, name[1:], toks)
1072        else:
1073            parse_generic(start_lineno, parent_pat, name, toks)
1074        toks = []
1075
1076    if nesting != 0:
1077        error(lineno, 'missing close brace')
1078# end parse_file
1079
1080
1081class SizeTree:
1082    """Class representing a node in a size decode tree"""
1083
1084    def __init__(self, m, w):
1085        self.mask = m
1086        self.subs = []
1087        self.base = None
1088        self.width = w
1089
1090    def str1(self, i):
1091        ind = str_indent(i)
1092        r = '{0}{1:08x}'.format(ind, self.mask)
1093        r += ' [\n'
1094        for (b, s) in self.subs:
1095            r += '{0}  {1:08x}:\n'.format(ind, b)
1096            r += s.str1(i + 4) + '\n'
1097        r += ind + ']'
1098        return r
1099
1100    def __str__(self):
1101        return self.str1(0)
1102
1103    def output_code(self, i, extracted, outerbits, outermask):
1104        ind = str_indent(i)
1105
1106        # If we need to load more bytes to test, do so now.
1107        if extracted < self.width:
1108            output(ind, 'insn = ', decode_function,
1109                   '_load_bytes(ctx, insn, {0}, {1});\n'
1110                   .format(extracted // 8, self.width // 8));
1111            extracted = self.width
1112
1113        # Attempt to aid the compiler in producing compact switch statements.
1114        # If the bits in the mask are contiguous, extract them.
1115        sh = is_contiguous(self.mask)
1116        if sh > 0:
1117            # Propagate SH down into the local functions.
1118            def str_switch(b, sh=sh):
1119                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1120
1121            def str_case(b, sh=sh):
1122                return '0x{0:x}'.format(b >> sh)
1123        else:
1124            def str_switch(b):
1125                return 'insn & 0x{0:08x}'.format(b)
1126
1127            def str_case(b):
1128                return '0x{0:08x}'.format(b)
1129
1130        output(ind, 'switch (', str_switch(self.mask), ') {\n')
1131        for b, s in sorted(self.subs):
1132            innermask = outermask | self.mask
1133            innerbits = outerbits | b
1134            output(ind, 'case ', str_case(b), ':\n')
1135            output(ind, '    /* ',
1136                   str_match_bits(innerbits, innermask), ' */\n')
1137            s.output_code(i + 4, extracted, innerbits, innermask)
1138        output(ind, '}\n')
1139        output(ind, 'return insn;\n')
1140# end SizeTree
1141
1142class SizeLeaf:
1143    """Class representing a leaf node in a size decode tree"""
1144
1145    def __init__(self, m, w):
1146        self.mask = m
1147        self.width = w
1148
1149    def str1(self, i):
1150        ind = str_indent(i)
1151        return '{0}{1:08x}'.format(ind, self.mask)
1152
1153    def __str__(self):
1154        return self.str1(0)
1155
1156    def output_code(self, i, extracted, outerbits, outermask):
1157        global decode_function
1158        ind = str_indent(i)
1159
1160        # If we need to load more bytes, do so now.
1161        if extracted < self.width:
1162            output(ind, 'insn = ', decode_function,
1163                   '_load_bytes(ctx, insn, {0}, {1});\n'
1164                   .format(extracted // 8, self.width // 8));
1165            extracted = self.width
1166        output(ind, 'return insn;\n')
1167# end SizeLeaf
1168
1169
1170def build_size_tree(pats, width, outerbits, outermask):
1171    global insnwidth
1172
1173    # Collect the mask of bits that are fixed in this width
1174    innermask = 0xff << (insnwidth - width)
1175    innermask &= ~outermask
1176    minwidth = None
1177    onewidth = True
1178    for i in pats:
1179        innermask &= i.fixedmask
1180        if minwidth is None:
1181            minwidth = i.width
1182        elif minwidth != i.width:
1183            onewidth = False;
1184            if minwidth < i.width:
1185                minwidth = i.width
1186
1187    if onewidth:
1188        return SizeLeaf(innermask, minwidth)
1189
1190    if innermask == 0:
1191        if width < minwidth:
1192            return build_size_tree(pats, width + 8, outerbits, outermask)
1193
1194        pnames = []
1195        for p in pats:
1196            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1197        error_with_file(pats[0].file, pats[0].lineno,
1198                        'overlapping patterns size {0}:'.format(width), pnames)
1199
1200    bins = {}
1201    for i in pats:
1202        fb = i.fixedbits & innermask
1203        if fb in bins:
1204            bins[fb].append(i)
1205        else:
1206            bins[fb] = [i]
1207
1208    fullmask = outermask | innermask
1209    lens = sorted(bins.keys())
1210    if len(lens) == 1:
1211        b = lens[0]
1212        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1213
1214    r = SizeTree(innermask, width)
1215    for b, l in bins.items():
1216        s = build_size_tree(l, width, b | outerbits, fullmask)
1217        r.subs.append((b, s))
1218    return r
1219# end build_size_tree
1220
1221
1222def prop_size(tree):
1223    """Propagate minimum widths up the decode size tree"""
1224
1225    if isinstance(tree, SizeTree):
1226        min = None
1227        for (b, s) in tree.subs:
1228            width = prop_size(s)
1229            if min is None or min > width:
1230                min = width
1231        assert min >= tree.width
1232        tree.width = min
1233    else:
1234        min = tree.width
1235    return min
1236# end prop_size
1237
1238
1239def main():
1240    global arguments
1241    global formats
1242    global allpatterns
1243    global translate_scope
1244    global translate_prefix
1245    global output_fd
1246    global output_file
1247    global input_file
1248    global insnwidth
1249    global insntype
1250    global insnmask
1251    global decode_function
1252    global variablewidth
1253    global anyextern
1254
1255    decode_scope = 'static '
1256
1257    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1258                 'static-decode=', 'varinsnwidth=']
1259    try:
1260        (opts, args) = getopt.getopt(sys.argv[1:], 'o:vw:', long_opts)
1261    except getopt.GetoptError as err:
1262        error(0, err)
1263    for o, a in opts:
1264        if o in ('-o', '--output'):
1265            output_file = a
1266        elif o == '--decode':
1267            decode_function = a
1268            decode_scope = ''
1269        elif o == '--static-decode':
1270            decode_function = a
1271        elif o == '--translate':
1272            translate_prefix = a
1273            translate_scope = ''
1274        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1275            if o == '--varinsnwidth':
1276                variablewidth = True
1277            insnwidth = int(a)
1278            if insnwidth == 16:
1279                insntype = 'uint16_t'
1280                insnmask = 0xffff
1281            elif insnwidth != 32:
1282                error(0, 'cannot handle insns of width', insnwidth)
1283        else:
1284            assert False, 'unhandled option'
1285
1286    if len(args) < 1:
1287        error(0, 'missing input file')
1288
1289    toppat = ExcMultiPattern(0)
1290
1291    for filename in args:
1292        input_file = filename
1293        f = open(filename, 'r')
1294        parse_file(f, toppat)
1295        f.close()
1296
1297    # We do not want to compute masks for toppat, because those masks
1298    # are used as a starting point for build_tree.  For toppat, we must
1299    # insist that decode begins from naught.
1300    for i in toppat.pats:
1301        i.prop_masks()
1302
1303    toppat.build_tree()
1304    toppat.prop_format()
1305
1306    if variablewidth:
1307        for i in toppat.pats:
1308            i.prop_width()
1309        stree = build_size_tree(toppat.pats, 8, 0, 0)
1310        prop_size(stree)
1311
1312    if output_file:
1313        output_fd = open(output_file, 'w')
1314    else:
1315        output_fd = sys.stdout
1316
1317    output_autogen()
1318    for n in sorted(arguments.keys()):
1319        f = arguments[n]
1320        f.output_def()
1321
1322    # A single translate function can be invoked for different patterns.
1323    # Make sure that the argument sets are the same, and declare the
1324    # function only once.
1325    #
1326    # If we're sharing formats, we're likely also sharing trans_* functions,
1327    # but we can't tell which ones.  Prevent issues from the compiler by
1328    # suppressing redundant declaration warnings.
1329    if anyextern:
1330        output("#pragma GCC diagnostic push\n",
1331               "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1332               "#ifdef __clang__\n"
1333               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1334               "#endif\n\n")
1335
1336    out_pats = {}
1337    for i in allpatterns:
1338        if i.name in out_pats:
1339            p = out_pats[i.name]
1340            if i.base.base != p.base.base:
1341                error(0, i.name, ' has conflicting argument sets')
1342        else:
1343            i.output_decl()
1344            out_pats[i.name] = i
1345    output('\n')
1346
1347    if anyextern:
1348        output("#pragma GCC diagnostic pop\n\n")
1349
1350    for n in sorted(formats.keys()):
1351        f = formats[n]
1352        f.output_extract()
1353
1354    output(decode_scope, 'bool ', decode_function,
1355           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1356
1357    i4 = str_indent(4)
1358
1359    if len(allpatterns) != 0:
1360        output(i4, 'union {\n')
1361        for n in sorted(arguments.keys()):
1362            f = arguments[n]
1363            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1364        output(i4, '} u;\n\n')
1365        toppat.output_code(4, False, 0, 0)
1366
1367    output(i4, 'return false;\n')
1368    output('}\n')
1369
1370    if variablewidth:
1371        output('\n', decode_scope, insntype, ' ', decode_function,
1372               '_load(DisasContext *ctx)\n{\n',
1373               '    ', insntype, ' insn = 0;\n\n')
1374        stree.output_code(4, 0, 0, 0)
1375        output('}\n')
1376
1377    if output_file:
1378        output_fd.close()
1379# end main
1380
1381
1382if __name__ == '__main__':
1383    main()
1384