xref: /qemu/scripts/decodetree.py (revision 8063396b)
1#!/usr/bin/env python3
2# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
20# See the syntax and semantics in docs/devel/decodetree.rst.
21#
22
23import os
24import re
25import sys
26import getopt
27
28insnwidth = 32
29insnmask = 0xffffffff
30variablewidth = False
31fields = {}
32arguments = {}
33formats = {}
34allpatterns = []
35anyextern = False
36
37translate_prefix = 'trans'
38translate_scope = 'static '
39input_file = ''
40output_file = None
41output_fd = None
42insntype = 'uint32_t'
43decode_function = 'decode'
44
45# An identifier for C.
46re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
47
48# Identifiers for Arguments, Fields, Formats and Patterns.
49re_arg_ident = '&[a-zA-Z0-9_]*'
50re_fld_ident = '%[a-zA-Z0-9_]*'
51re_fmt_ident = '@[a-zA-Z0-9_]*'
52re_pat_ident = '[a-zA-Z0-9_]*'
53
54def error_with_file(file, lineno, *args):
55    """Print an error message from file:line and args and exit."""
56    global output_file
57    global output_fd
58
59    prefix = ''
60    if file:
61        prefix += '{0}:'.format(file)
62    if lineno:
63        prefix += '{0}:'.format(lineno)
64    if prefix:
65        prefix += ' '
66    print(prefix, end='error: ', file=sys.stderr)
67    print(*args, file=sys.stderr)
68
69    if output_file and output_fd:
70        output_fd.close()
71        os.remove(output_file)
72    exit(1)
73# end error_with_file
74
75
76def error(lineno, *args):
77    error_with_file(input_file, lineno, *args)
78# end error
79
80
81def output(*args):
82    global output_fd
83    for a in args:
84        output_fd.write(a)
85
86
87def output_autogen():
88    output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
89
90
91def str_indent(c):
92    """Return a string with C spaces"""
93    return ' ' * c
94
95
96def str_fields(fields):
97    """Return a string uniquely identifing FIELDS"""
98    r = ''
99    for n in sorted(fields.keys()):
100        r += '_' + n
101    return r[1:]
102
103
104def str_match_bits(bits, mask):
105    """Return a string pretty-printing BITS/MASK"""
106    global insnwidth
107
108    i = 1 << (insnwidth - 1)
109    space = 0x01010100
110    r = ''
111    while i != 0:
112        if i & mask:
113            if i & bits:
114                r += '1'
115            else:
116                r += '0'
117        else:
118            r += '.'
119        if i & space:
120            r += ' '
121        i >>= 1
122    return r
123
124
125def is_pow2(x):
126    """Return true iff X is equal to a power of 2."""
127    return (x & (x - 1)) == 0
128
129
130def ctz(x):
131    """Return the number of times 2 factors into X."""
132    assert x != 0
133    r = 0
134    while ((x >> r) & 1) == 0:
135        r += 1
136    return r
137
138
139def is_contiguous(bits):
140    if bits == 0:
141        return -1
142    shift = ctz(bits)
143    if is_pow2((bits >> shift) + 1):
144        return shift
145    else:
146        return -1
147
148
149def eq_fields_for_args(flds_a, flds_b):
150    if len(flds_a) != len(flds_b):
151        return False
152    for k, a in flds_a.items():
153        if k not in flds_b:
154            return False
155    return True
156
157
158def eq_fields_for_fmts(flds_a, flds_b):
159    if len(flds_a) != len(flds_b):
160        return False
161    for k, a in flds_a.items():
162        if k not in flds_b:
163            return False
164        b = flds_b[k]
165        if a.__class__ != b.__class__ or a != b:
166            return False
167    return True
168
169
170class Field:
171    """Class representing a simple instruction field"""
172    def __init__(self, sign, pos, len):
173        self.sign = sign
174        self.pos = pos
175        self.len = len
176        self.mask = ((1 << len) - 1) << pos
177
178    def __str__(self):
179        if self.sign:
180            s = 's'
181        else:
182            s = ''
183        return str(self.pos) + ':' + s + str(self.len)
184
185    def str_extract(self):
186        if self.sign:
187            extr = 'sextract32'
188        else:
189            extr = 'extract32'
190        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
191
192    def __eq__(self, other):
193        return self.sign == other.sign and self.mask == other.mask
194
195    def __ne__(self, other):
196        return not self.__eq__(other)
197# end Field
198
199
200class MultiField:
201    """Class representing a compound instruction field"""
202    def __init__(self, subs, mask):
203        self.subs = subs
204        self.sign = subs[0].sign
205        self.mask = mask
206
207    def __str__(self):
208        return str(self.subs)
209
210    def str_extract(self):
211        ret = '0'
212        pos = 0
213        for f in reversed(self.subs):
214            if pos == 0:
215                ret = f.str_extract()
216            else:
217                ret = 'deposit32({0}, {1}, {2}, {3})' \
218                      .format(ret, pos, 32 - pos, f.str_extract())
219            pos += f.len
220        return ret
221
222    def __ne__(self, other):
223        if len(self.subs) != len(other.subs):
224            return True
225        for a, b in zip(self.subs, other.subs):
226            if a.__class__ != b.__class__ or a != b:
227                return True
228        return False
229
230    def __eq__(self, other):
231        return not self.__ne__(other)
232# end MultiField
233
234
235class ConstField:
236    """Class representing an argument field with constant value"""
237    def __init__(self, value):
238        self.value = value
239        self.mask = 0
240        self.sign = value < 0
241
242    def __str__(self):
243        return str(self.value)
244
245    def str_extract(self):
246        return str(self.value)
247
248    def __cmp__(self, other):
249        return self.value - other.value
250# end ConstField
251
252
253class FunctionField:
254    """Class representing a field passed through a function"""
255    def __init__(self, func, base):
256        self.mask = base.mask
257        self.sign = base.sign
258        self.base = base
259        self.func = func
260
261    def __str__(self):
262        return self.func + '(' + str(self.base) + ')'
263
264    def str_extract(self):
265        return self.func + '(ctx, ' + self.base.str_extract() + ')'
266
267    def __eq__(self, other):
268        return self.func == other.func and self.base == other.base
269
270    def __ne__(self, other):
271        return not self.__eq__(other)
272# end FunctionField
273
274
275class ParameterField:
276    """Class representing a pseudo-field read from a function"""
277    def __init__(self, func):
278        self.mask = 0
279        self.sign = 0
280        self.func = func
281
282    def __str__(self):
283        return self.func
284
285    def str_extract(self):
286        return self.func + '(ctx)'
287
288    def __eq__(self, other):
289        return self.func == other.func
290
291    def __ne__(self, other):
292        return not self.__eq__(other)
293# end ParameterField
294
295
296class Arguments:
297    """Class representing the extracted fields of a format"""
298    def __init__(self, nm, flds, extern):
299        self.name = nm
300        self.extern = extern
301        self.fields = sorted(flds)
302
303    def __str__(self):
304        return self.name + ' ' + str(self.fields)
305
306    def struct_name(self):
307        return 'arg_' + self.name
308
309    def output_def(self):
310        if not self.extern:
311            output('typedef struct {\n')
312            for n in self.fields:
313                output('    int ', n, ';\n')
314            output('} ', self.struct_name(), ';\n\n')
315# end Arguments
316
317
318class General:
319    """Common code between instruction formats and instruction patterns"""
320    def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
321        self.name = name
322        self.file = input_file
323        self.lineno = lineno
324        self.base = base
325        self.fixedbits = fixb
326        self.fixedmask = fixm
327        self.undefmask = udfm
328        self.fieldmask = fldm
329        self.fields = flds
330        self.width = w
331
332    def __str__(self):
333        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
334
335    def str1(self, i):
336        return str_indent(i) + self.__str__()
337# end General
338
339
340class Format(General):
341    """Class representing an instruction format"""
342
343    def extract_name(self):
344        global decode_function
345        return decode_function + '_extract_' + self.name
346
347    def output_extract(self):
348        output('static void ', self.extract_name(), '(DisasContext *ctx, ',
349               self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
350        for n, f in self.fields.items():
351            output('    a->', n, ' = ', f.str_extract(), ';\n')
352        output('}\n\n')
353# end Format
354
355
356class Pattern(General):
357    """Class representing an instruction pattern"""
358
359    def output_decl(self):
360        global translate_scope
361        global translate_prefix
362        output('typedef ', self.base.base.struct_name(),
363               ' arg_', self.name, ';\n')
364        output(translate_scope, 'bool ', translate_prefix, '_', self.name,
365               '(DisasContext *ctx, arg_', self.name, ' *a);\n')
366
367    def output_code(self, i, extracted, outerbits, outermask):
368        global translate_prefix
369        ind = str_indent(i)
370        arg = self.base.base.name
371        output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
372        if not extracted:
373            output(ind, self.base.extract_name(),
374                   '(ctx, &u.f_', arg, ', insn);\n')
375        for n, f in self.fields.items():
376            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
377        output(ind, 'if (', translate_prefix, '_', self.name,
378               '(ctx, &u.f_', arg, ')) return true;\n')
379
380    # Normal patterns do not have children.
381    def build_tree(self):
382        return
383    def prop_masks(self):
384        return
385    def prop_format(self):
386        return
387    def prop_width(self):
388        return
389
390# end Pattern
391
392
393class MultiPattern(General):
394    """Class representing a set of instruction patterns"""
395
396    def __init__(self, lineno):
397        self.file = input_file
398        self.lineno = lineno
399        self.pats = []
400        self.base = None
401        self.fixedbits = 0
402        self.fixedmask = 0
403        self.undefmask = 0
404        self.width = None
405
406    def __str__(self):
407        r = 'group'
408        if self.fixedbits is not None:
409            r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
410        return r
411
412    def output_decl(self):
413        for p in self.pats:
414            p.output_decl()
415
416    def prop_masks(self):
417        global insnmask
418
419        fixedmask = insnmask
420        undefmask = insnmask
421
422        # Collect fixedmask/undefmask for all of the children.
423        for p in self.pats:
424            p.prop_masks()
425            fixedmask &= p.fixedmask
426            undefmask &= p.undefmask
427
428        # Widen fixedmask until all fixedbits match
429        repeat = True
430        fixedbits = 0
431        while repeat and fixedmask != 0:
432            fixedbits = None
433            for p in self.pats:
434                thisbits = p.fixedbits & fixedmask
435                if fixedbits is None:
436                    fixedbits = thisbits
437                elif fixedbits != thisbits:
438                    fixedmask &= ~(fixedbits ^ thisbits)
439                    break
440            else:
441                repeat = False
442
443        self.fixedbits = fixedbits
444        self.fixedmask = fixedmask
445        self.undefmask = undefmask
446
447    def build_tree(self):
448        for p in self.pats:
449            p.build_tree()
450
451    def prop_format(self):
452        for p in self.pats:
453            p.build_tree()
454
455    def prop_width(self):
456        width = None
457        for p in self.pats:
458            p.prop_width()
459            if width is None:
460                width = p.width
461            elif width != p.width:
462                error_with_file(self.file, self.lineno,
463                                'width mismatch in patterns within braces')
464        self.width = width
465
466# end MultiPattern
467
468
469class IncMultiPattern(MultiPattern):
470    """Class representing an overlapping set of instruction patterns"""
471
472    def output_code(self, i, extracted, outerbits, outermask):
473        global translate_prefix
474        ind = str_indent(i)
475        for p in self.pats:
476            if outermask != p.fixedmask:
477                innermask = p.fixedmask & ~outermask
478                innerbits = p.fixedbits & ~outermask
479                output(ind, 'if ((insn & ',
480                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
481                       ') {\n')
482                output(ind, '    /* ',
483                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
484                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
485                output(ind, '}\n')
486            else:
487                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
488#end IncMultiPattern
489
490
491class Tree:
492    """Class representing a node in a decode tree"""
493
494    def __init__(self, fm, tm):
495        self.fixedmask = fm
496        self.thismask = tm
497        self.subs = []
498        self.base = None
499
500    def str1(self, i):
501        ind = str_indent(i)
502        r = '{0}{1:08x}'.format(ind, self.fixedmask)
503        if self.format:
504            r += ' ' + self.format.name
505        r += ' [\n'
506        for (b, s) in self.subs:
507            r += '{0}  {1:08x}:\n'.format(ind, b)
508            r += s.str1(i + 4) + '\n'
509        r += ind + ']'
510        return r
511
512    def __str__(self):
513        return self.str1(0)
514
515    def output_code(self, i, extracted, outerbits, outermask):
516        ind = str_indent(i)
517
518        # If we identified all nodes below have the same format,
519        # extract the fields now.
520        if not extracted and self.base:
521            output(ind, self.base.extract_name(),
522                   '(ctx, &u.f_', self.base.base.name, ', insn);\n')
523            extracted = True
524
525        # Attempt to aid the compiler in producing compact switch statements.
526        # If the bits in the mask are contiguous, extract them.
527        sh = is_contiguous(self.thismask)
528        if sh > 0:
529            # Propagate SH down into the local functions.
530            def str_switch(b, sh=sh):
531                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
532
533            def str_case(b, sh=sh):
534                return '0x{0:x}'.format(b >> sh)
535        else:
536            def str_switch(b):
537                return 'insn & 0x{0:08x}'.format(b)
538
539            def str_case(b):
540                return '0x{0:08x}'.format(b)
541
542        output(ind, 'switch (', str_switch(self.thismask), ') {\n')
543        for b, s in sorted(self.subs):
544            assert (self.thismask & ~s.fixedmask) == 0
545            innermask = outermask | self.thismask
546            innerbits = outerbits | b
547            output(ind, 'case ', str_case(b), ':\n')
548            output(ind, '    /* ',
549                   str_match_bits(innerbits, innermask), ' */\n')
550            s.output_code(i + 4, extracted, innerbits, innermask)
551            output(ind, '    return false;\n')
552        output(ind, '}\n')
553# end Tree
554
555
556class ExcMultiPattern(MultiPattern):
557    """Class representing a non-overlapping set of instruction patterns"""
558
559    def output_code(self, i, extracted, outerbits, outermask):
560        # Defer everything to our decomposed Tree node
561        self.tree.output_code(i, extracted, outerbits, outermask)
562
563    @staticmethod
564    def __build_tree(pats, outerbits, outermask):
565        # Find the intersection of all remaining fixedmask.
566        innermask = ~outermask & insnmask
567        for i in pats:
568            innermask &= i.fixedmask
569
570        if innermask == 0:
571            # Edge condition: One pattern covers the entire insnmask
572            if len(pats) == 1:
573                t = Tree(outermask, innermask)
574                t.subs.append((0, pats[0]))
575                return t
576
577            text = 'overlapping patterns:'
578            for p in pats:
579                text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
580            error_with_file(pats[0].file, pats[0].lineno, text)
581
582        fullmask = outermask | innermask
583
584        # Sort each element of pats into the bin selected by the mask.
585        bins = {}
586        for i in pats:
587            fb = i.fixedbits & innermask
588            if fb in bins:
589                bins[fb].append(i)
590            else:
591                bins[fb] = [i]
592
593        # We must recurse if any bin has more than one element or if
594        # the single element in the bin has not been fully matched.
595        t = Tree(fullmask, innermask)
596
597        for b, l in bins.items():
598            s = l[0]
599            if len(l) > 1 or s.fixedmask & ~fullmask != 0:
600                s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
601            t.subs.append((b, s))
602
603        return t
604
605    def build_tree(self):
606        super().prop_format()
607        self.tree = self.__build_tree(self.pats, self.fixedbits,
608                                      self.fixedmask)
609
610    @staticmethod
611    def __prop_format(tree):
612        """Propagate Format objects into the decode tree"""
613
614        # Depth first search.
615        for (b, s) in tree.subs:
616            if isinstance(s, Tree):
617                ExcMultiPattern.__prop_format(s)
618
619        # If all entries in SUBS have the same format, then
620        # propagate that into the tree.
621        f = None
622        for (b, s) in tree.subs:
623            if f is None:
624                f = s.base
625                if f is None:
626                    return
627            if f is not s.base:
628                return
629        tree.base = f
630
631    def prop_format(self):
632        super().prop_format()
633        self.__prop_format(self.tree)
634
635# end ExcMultiPattern
636
637
638def parse_field(lineno, name, toks):
639    """Parse one instruction field from TOKS at LINENO"""
640    global fields
641    global insnwidth
642
643    # A "simple" field will have only one entry;
644    # a "multifield" will have several.
645    subs = []
646    width = 0
647    func = None
648    for t in toks:
649        if re.match('^!function=', t):
650            if func:
651                error(lineno, 'duplicate function')
652            func = t.split('=')
653            func = func[1]
654            continue
655
656        if re.fullmatch('[0-9]+:s[0-9]+', t):
657            # Signed field extract
658            subtoks = t.split(':s')
659            sign = True
660        elif re.fullmatch('[0-9]+:[0-9]+', t):
661            # Unsigned field extract
662            subtoks = t.split(':')
663            sign = False
664        else:
665            error(lineno, 'invalid field token "{0}"'.format(t))
666        po = int(subtoks[0])
667        le = int(subtoks[1])
668        if po + le > insnwidth:
669            error(lineno, 'field {0} too large'.format(t))
670        f = Field(sign, po, le)
671        subs.append(f)
672        width += le
673
674    if width > insnwidth:
675        error(lineno, 'field too large')
676    if len(subs) == 0:
677        if func:
678            f = ParameterField(func)
679        else:
680            error(lineno, 'field with no value')
681    else:
682        if len(subs) == 1:
683            f = subs[0]
684        else:
685            mask = 0
686            for s in subs:
687                if mask & s.mask:
688                    error(lineno, 'field components overlap')
689                mask |= s.mask
690            f = MultiField(subs, mask)
691        if func:
692            f = FunctionField(func, f)
693
694    if name in fields:
695        error(lineno, 'duplicate field', name)
696    fields[name] = f
697# end parse_field
698
699
700def parse_arguments(lineno, name, toks):
701    """Parse one argument set from TOKS at LINENO"""
702    global arguments
703    global re_C_ident
704    global anyextern
705
706    flds = []
707    extern = False
708    for t in toks:
709        if re.fullmatch('!extern', t):
710            extern = True
711            anyextern = True
712            continue
713        if not re.fullmatch(re_C_ident, t):
714            error(lineno, 'invalid argument set token "{0}"'.format(t))
715        if t in flds:
716            error(lineno, 'duplicate argument "{0}"'.format(t))
717        flds.append(t)
718
719    if name in arguments:
720        error(lineno, 'duplicate argument set', name)
721    arguments[name] = Arguments(name, flds, extern)
722# end parse_arguments
723
724
725def lookup_field(lineno, name):
726    global fields
727    if name in fields:
728        return fields[name]
729    error(lineno, 'undefined field', name)
730
731
732def add_field(lineno, flds, new_name, f):
733    if new_name in flds:
734        error(lineno, 'duplicate field', new_name)
735    flds[new_name] = f
736    return flds
737
738
739def add_field_byname(lineno, flds, new_name, old_name):
740    return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
741
742
743def infer_argument_set(flds):
744    global arguments
745    global decode_function
746
747    for arg in arguments.values():
748        if eq_fields_for_args(flds, arg.fields):
749            return arg
750
751    name = decode_function + str(len(arguments))
752    arg = Arguments(name, flds.keys(), False)
753    arguments[name] = arg
754    return arg
755
756
757def infer_format(arg, fieldmask, flds, width):
758    global arguments
759    global formats
760    global decode_function
761
762    const_flds = {}
763    var_flds = {}
764    for n, c in flds.items():
765        if c is ConstField:
766            const_flds[n] = c
767        else:
768            var_flds[n] = c
769
770    # Look for an existing format with the same argument set and fields
771    for fmt in formats.values():
772        if arg and fmt.base != arg:
773            continue
774        if fieldmask != fmt.fieldmask:
775            continue
776        if width != fmt.width:
777            continue
778        if not eq_fields_for_fmts(flds, fmt.fields):
779            continue
780        return (fmt, const_flds)
781
782    name = decode_function + '_Fmt_' + str(len(formats))
783    if not arg:
784        arg = infer_argument_set(flds)
785
786    fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
787    formats[name] = fmt
788
789    return (fmt, const_flds)
790# end infer_format
791
792
793def parse_generic(lineno, parent_pat, name, toks):
794    """Parse one instruction format from TOKS at LINENO"""
795    global fields
796    global arguments
797    global formats
798    global allpatterns
799    global re_arg_ident
800    global re_fld_ident
801    global re_fmt_ident
802    global re_C_ident
803    global insnwidth
804    global insnmask
805    global variablewidth
806
807    is_format = parent_pat is None
808
809    fixedmask = 0
810    fixedbits = 0
811    undefmask = 0
812    width = 0
813    flds = {}
814    arg = None
815    fmt = None
816    for t in toks:
817        # '&Foo' gives a format an explcit argument set.
818        if re.fullmatch(re_arg_ident, t):
819            tt = t[1:]
820            if arg:
821                error(lineno, 'multiple argument sets')
822            if tt in arguments:
823                arg = arguments[tt]
824            else:
825                error(lineno, 'undefined argument set', t)
826            continue
827
828        # '@Foo' gives a pattern an explicit format.
829        if re.fullmatch(re_fmt_ident, t):
830            tt = t[1:]
831            if fmt:
832                error(lineno, 'multiple formats')
833            if tt in formats:
834                fmt = formats[tt]
835            else:
836                error(lineno, 'undefined format', t)
837            continue
838
839        # '%Foo' imports a field.
840        if re.fullmatch(re_fld_ident, t):
841            tt = t[1:]
842            flds = add_field_byname(lineno, flds, tt, tt)
843            continue
844
845        # 'Foo=%Bar' imports a field with a different name.
846        if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
847            (fname, iname) = t.split('=%')
848            flds = add_field_byname(lineno, flds, fname, iname)
849            continue
850
851        # 'Foo=number' sets an argument field to a constant value
852        if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
853            (fname, value) = t.split('=')
854            value = int(value)
855            flds = add_field(lineno, flds, fname, ConstField(value))
856            continue
857
858        # Pattern of 0s, 1s, dots and dashes indicate required zeros,
859        # required ones, or dont-cares.
860        if re.fullmatch('[01.-]+', t):
861            shift = len(t)
862            fms = t.replace('0', '1')
863            fms = fms.replace('.', '0')
864            fms = fms.replace('-', '0')
865            fbs = t.replace('.', '0')
866            fbs = fbs.replace('-', '0')
867            ubm = t.replace('1', '0')
868            ubm = ubm.replace('.', '0')
869            ubm = ubm.replace('-', '1')
870            fms = int(fms, 2)
871            fbs = int(fbs, 2)
872            ubm = int(ubm, 2)
873            fixedbits = (fixedbits << shift) | fbs
874            fixedmask = (fixedmask << shift) | fms
875            undefmask = (undefmask << shift) | ubm
876        # Otherwise, fieldname:fieldwidth
877        elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
878            (fname, flen) = t.split(':')
879            sign = False
880            if flen[0] == 's':
881                sign = True
882                flen = flen[1:]
883            shift = int(flen, 10)
884            if shift + width > insnwidth:
885                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
886            f = Field(sign, insnwidth - width - shift, shift)
887            flds = add_field(lineno, flds, fname, f)
888            fixedbits <<= shift
889            fixedmask <<= shift
890            undefmask <<= shift
891        else:
892            error(lineno, 'invalid token "{0}"'.format(t))
893        width += shift
894
895    if variablewidth and width < insnwidth and width % 8 == 0:
896        shift = insnwidth - width
897        fixedbits <<= shift
898        fixedmask <<= shift
899        undefmask <<= shift
900        undefmask |= (1 << shift) - 1
901
902    # We should have filled in all of the bits of the instruction.
903    elif not (is_format and width == 0) and width != insnwidth:
904        error(lineno, 'definition has {0} bits'.format(width))
905
906    # Do not check for fields overlaping fields; one valid usage
907    # is to be able to duplicate fields via import.
908    fieldmask = 0
909    for f in flds.values():
910        fieldmask |= f.mask
911
912    # Fix up what we've parsed to match either a format or a pattern.
913    if is_format:
914        # Formats cannot reference formats.
915        if fmt:
916            error(lineno, 'format referencing format')
917        # If an argument set is given, then there should be no fields
918        # without a place to store it.
919        if arg:
920            for f in flds.keys():
921                if f not in arg.fields:
922                    error(lineno, 'field {0} not in argument set {1}'
923                                  .format(f, arg.name))
924        else:
925            arg = infer_argument_set(flds)
926        if name in formats:
927            error(lineno, 'duplicate format name', name)
928        fmt = Format(name, lineno, arg, fixedbits, fixedmask,
929                     undefmask, fieldmask, flds, width)
930        formats[name] = fmt
931    else:
932        # Patterns can reference a format ...
933        if fmt:
934            # ... but not an argument simultaneously
935            if arg:
936                error(lineno, 'pattern specifies both format and argument set')
937            if fixedmask & fmt.fixedmask:
938                error(lineno, 'pattern fixed bits overlap format fixed bits')
939            if width != fmt.width:
940                error(lineno, 'pattern uses format of different width')
941            fieldmask |= fmt.fieldmask
942            fixedbits |= fmt.fixedbits
943            fixedmask |= fmt.fixedmask
944            undefmask |= fmt.undefmask
945        else:
946            (fmt, flds) = infer_format(arg, fieldmask, flds, width)
947        arg = fmt.base
948        for f in flds.keys():
949            if f not in arg.fields:
950                error(lineno, 'field {0} not in argument set {1}'
951                              .format(f, arg.name))
952            if f in fmt.fields.keys():
953                error(lineno, 'field {0} set by format and pattern'.format(f))
954        for f in arg.fields:
955            if f not in flds.keys() and f not in fmt.fields.keys():
956                error(lineno, 'field {0} not initialized'.format(f))
957        pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
958                      undefmask, fieldmask, flds, width)
959        parent_pat.pats.append(pat)
960        allpatterns.append(pat)
961
962    # Validate the masks that we have assembled.
963    if fieldmask & fixedmask:
964        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
965                      .format(fieldmask, fixedmask))
966    if fieldmask & undefmask:
967        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
968                      .format(fieldmask, undefmask))
969    if fixedmask & undefmask:
970        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
971                      .format(fixedmask, undefmask))
972    if not is_format:
973        allbits = fieldmask | fixedmask | undefmask
974        if allbits != insnmask:
975            error(lineno, 'bits left unspecified (0x{0:08x})'
976                          .format(allbits ^ insnmask))
977# end parse_general
978
979
980def parse_file(f, parent_pat):
981    """Parse all of the patterns within a file"""
982    global re_arg_ident
983    global re_fld_ident
984    global re_fmt_ident
985    global re_pat_ident
986
987    # Read all of the lines of the file.  Concatenate lines
988    # ending in backslash; discard empty lines and comments.
989    toks = []
990    lineno = 0
991    nesting = 0
992    nesting_pats = []
993
994    for line in f:
995        lineno += 1
996
997        # Expand and strip spaces, to find indent.
998        line = line.rstrip()
999        line = line.expandtabs()
1000        len1 = len(line)
1001        line = line.lstrip()
1002        len2 = len(line)
1003
1004        # Discard comments
1005        end = line.find('#')
1006        if end >= 0:
1007            line = line[:end]
1008
1009        t = line.split()
1010        if len(toks) != 0:
1011            # Next line after continuation
1012            toks.extend(t)
1013        else:
1014            # Allow completely blank lines.
1015            if len1 == 0:
1016                continue
1017            indent = len1 - len2
1018            # Empty line due to comment.
1019            if len(t) == 0:
1020                # Indentation must be correct, even for comment lines.
1021                if indent != nesting:
1022                    error(lineno, 'indentation ', indent, ' != ', nesting)
1023                continue
1024            start_lineno = lineno
1025            toks = t
1026
1027        # Continuation?
1028        if toks[-1] == '\\':
1029            toks.pop()
1030            continue
1031
1032        name = toks[0]
1033        del toks[0]
1034
1035        # End nesting?
1036        if name == '}' or name == ']':
1037            if len(toks) != 0:
1038                error(start_lineno, 'extra tokens after close brace')
1039
1040            # Make sure { } and [ ] nest properly.
1041            if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1042                error(lineno, 'mismatched close brace')
1043
1044            try:
1045                parent_pat = nesting_pats.pop()
1046            except:
1047                error(lineno, 'extra close brace')
1048
1049            nesting -= 2
1050            if indent != nesting:
1051                error(lineno, 'indentation ', indent, ' != ', nesting)
1052
1053            toks = []
1054            continue
1055
1056        # Everything else should have current indentation.
1057        if indent != nesting:
1058            error(start_lineno, 'indentation ', indent, ' != ', nesting)
1059
1060        # Start nesting?
1061        if name == '{' or name == '[':
1062            if len(toks) != 0:
1063                error(start_lineno, 'extra tokens after open brace')
1064
1065            if name == '{':
1066                nested_pat = IncMultiPattern(start_lineno)
1067            else:
1068                nested_pat = ExcMultiPattern(start_lineno)
1069            parent_pat.pats.append(nested_pat)
1070            nesting_pats.append(parent_pat)
1071            parent_pat = nested_pat
1072
1073            nesting += 2
1074            toks = []
1075            continue
1076
1077        # Determine the type of object needing to be parsed.
1078        if re.fullmatch(re_fld_ident, name):
1079            parse_field(start_lineno, name[1:], toks)
1080        elif re.fullmatch(re_arg_ident, name):
1081            parse_arguments(start_lineno, name[1:], toks)
1082        elif re.fullmatch(re_fmt_ident, name):
1083            parse_generic(start_lineno, None, name[1:], toks)
1084        elif re.fullmatch(re_pat_ident, name):
1085            parse_generic(start_lineno, parent_pat, name, toks)
1086        else:
1087            error(lineno, 'invalid token "{0}"'.format(name))
1088        toks = []
1089
1090    if nesting != 0:
1091        error(lineno, 'missing close brace')
1092# end parse_file
1093
1094
1095class SizeTree:
1096    """Class representing a node in a size decode tree"""
1097
1098    def __init__(self, m, w):
1099        self.mask = m
1100        self.subs = []
1101        self.base = None
1102        self.width = w
1103
1104    def str1(self, i):
1105        ind = str_indent(i)
1106        r = '{0}{1:08x}'.format(ind, self.mask)
1107        r += ' [\n'
1108        for (b, s) in self.subs:
1109            r += '{0}  {1:08x}:\n'.format(ind, b)
1110            r += s.str1(i + 4) + '\n'
1111        r += ind + ']'
1112        return r
1113
1114    def __str__(self):
1115        return self.str1(0)
1116
1117    def output_code(self, i, extracted, outerbits, outermask):
1118        ind = str_indent(i)
1119
1120        # If we need to load more bytes to test, do so now.
1121        if extracted < self.width:
1122            output(ind, 'insn = ', decode_function,
1123                   '_load_bytes(ctx, insn, {0}, {1});\n'
1124                   .format(extracted // 8, self.width // 8));
1125            extracted = self.width
1126
1127        # Attempt to aid the compiler in producing compact switch statements.
1128        # If the bits in the mask are contiguous, extract them.
1129        sh = is_contiguous(self.mask)
1130        if sh > 0:
1131            # Propagate SH down into the local functions.
1132            def str_switch(b, sh=sh):
1133                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1134
1135            def str_case(b, sh=sh):
1136                return '0x{0:x}'.format(b >> sh)
1137        else:
1138            def str_switch(b):
1139                return 'insn & 0x{0:08x}'.format(b)
1140
1141            def str_case(b):
1142                return '0x{0:08x}'.format(b)
1143
1144        output(ind, 'switch (', str_switch(self.mask), ') {\n')
1145        for b, s in sorted(self.subs):
1146            innermask = outermask | self.mask
1147            innerbits = outerbits | b
1148            output(ind, 'case ', str_case(b), ':\n')
1149            output(ind, '    /* ',
1150                   str_match_bits(innerbits, innermask), ' */\n')
1151            s.output_code(i + 4, extracted, innerbits, innermask)
1152        output(ind, '}\n')
1153        output(ind, 'return insn;\n')
1154# end SizeTree
1155
1156class SizeLeaf:
1157    """Class representing a leaf node in a size decode tree"""
1158
1159    def __init__(self, m, w):
1160        self.mask = m
1161        self.width = w
1162
1163    def str1(self, i):
1164        ind = str_indent(i)
1165        return '{0}{1:08x}'.format(ind, self.mask)
1166
1167    def __str__(self):
1168        return self.str1(0)
1169
1170    def output_code(self, i, extracted, outerbits, outermask):
1171        global decode_function
1172        ind = str_indent(i)
1173
1174        # If we need to load more bytes, do so now.
1175        if extracted < self.width:
1176            output(ind, 'insn = ', decode_function,
1177                   '_load_bytes(ctx, insn, {0}, {1});\n'
1178                   .format(extracted // 8, self.width // 8));
1179            extracted = self.width
1180        output(ind, 'return insn;\n')
1181# end SizeLeaf
1182
1183
1184def build_size_tree(pats, width, outerbits, outermask):
1185    global insnwidth
1186
1187    # Collect the mask of bits that are fixed in this width
1188    innermask = 0xff << (insnwidth - width)
1189    innermask &= ~outermask
1190    minwidth = None
1191    onewidth = True
1192    for i in pats:
1193        innermask &= i.fixedmask
1194        if minwidth is None:
1195            minwidth = i.width
1196        elif minwidth != i.width:
1197            onewidth = False;
1198            if minwidth < i.width:
1199                minwidth = i.width
1200
1201    if onewidth:
1202        return SizeLeaf(innermask, minwidth)
1203
1204    if innermask == 0:
1205        if width < minwidth:
1206            return build_size_tree(pats, width + 8, outerbits, outermask)
1207
1208        pnames = []
1209        for p in pats:
1210            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1211        error_with_file(pats[0].file, pats[0].lineno,
1212                        'overlapping patterns size {0}:'.format(width), pnames)
1213
1214    bins = {}
1215    for i in pats:
1216        fb = i.fixedbits & innermask
1217        if fb in bins:
1218            bins[fb].append(i)
1219        else:
1220            bins[fb] = [i]
1221
1222    fullmask = outermask | innermask
1223    lens = sorted(bins.keys())
1224    if len(lens) == 1:
1225        b = lens[0]
1226        return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1227
1228    r = SizeTree(innermask, width)
1229    for b, l in bins.items():
1230        s = build_size_tree(l, width, b | outerbits, fullmask)
1231        r.subs.append((b, s))
1232    return r
1233# end build_size_tree
1234
1235
1236def prop_size(tree):
1237    """Propagate minimum widths up the decode size tree"""
1238
1239    if isinstance(tree, SizeTree):
1240        min = None
1241        for (b, s) in tree.subs:
1242            width = prop_size(s)
1243            if min is None or min > width:
1244                min = width
1245        assert min >= tree.width
1246        tree.width = min
1247    else:
1248        min = tree.width
1249    return min
1250# end prop_size
1251
1252
1253def main():
1254    global arguments
1255    global formats
1256    global allpatterns
1257    global translate_scope
1258    global translate_prefix
1259    global output_fd
1260    global output_file
1261    global input_file
1262    global insnwidth
1263    global insntype
1264    global insnmask
1265    global decode_function
1266    global variablewidth
1267    global anyextern
1268
1269    decode_scope = 'static '
1270
1271    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1272                 'static-decode=', 'varinsnwidth=']
1273    try:
1274        (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1275    except getopt.GetoptError as err:
1276        error(0, err)
1277    for o, a in opts:
1278        if o in ('-o', '--output'):
1279            output_file = a
1280        elif o == '--decode':
1281            decode_function = a
1282            decode_scope = ''
1283        elif o == '--static-decode':
1284            decode_function = a
1285        elif o == '--translate':
1286            translate_prefix = a
1287            translate_scope = ''
1288        elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1289            if o == '--varinsnwidth':
1290                variablewidth = True
1291            insnwidth = int(a)
1292            if insnwidth == 16:
1293                insntype = 'uint16_t'
1294                insnmask = 0xffff
1295            elif insnwidth != 32:
1296                error(0, 'cannot handle insns of width', insnwidth)
1297        else:
1298            assert False, 'unhandled option'
1299
1300    if len(args) < 1:
1301        error(0, 'missing input file')
1302
1303    toppat = ExcMultiPattern(0)
1304
1305    for filename in args:
1306        input_file = filename
1307        f = open(filename, 'r')
1308        parse_file(f, toppat)
1309        f.close()
1310
1311    # We do not want to compute masks for toppat, because those masks
1312    # are used as a starting point for build_tree.  For toppat, we must
1313    # insist that decode begins from naught.
1314    for i in toppat.pats:
1315        i.prop_masks()
1316
1317    toppat.build_tree()
1318    toppat.prop_format()
1319
1320    if variablewidth:
1321        for i in toppat.pats:
1322            i.prop_width()
1323        stree = build_size_tree(toppat.pats, 8, 0, 0)
1324        prop_size(stree)
1325
1326    if output_file:
1327        output_fd = open(output_file, 'w')
1328    else:
1329        output_fd = sys.stdout
1330
1331    output_autogen()
1332    for n in sorted(arguments.keys()):
1333        f = arguments[n]
1334        f.output_def()
1335
1336    # A single translate function can be invoked for different patterns.
1337    # Make sure that the argument sets are the same, and declare the
1338    # function only once.
1339    #
1340    # If we're sharing formats, we're likely also sharing trans_* functions,
1341    # but we can't tell which ones.  Prevent issues from the compiler by
1342    # suppressing redundant declaration warnings.
1343    if anyextern:
1344        output("#pragma GCC diagnostic push\n",
1345               "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1346               "#ifdef __clang__\n"
1347               "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1348               "#endif\n\n")
1349
1350    out_pats = {}
1351    for i in allpatterns:
1352        if i.name in out_pats:
1353            p = out_pats[i.name]
1354            if i.base.base != p.base.base:
1355                error(0, i.name, ' has conflicting argument sets')
1356        else:
1357            i.output_decl()
1358            out_pats[i.name] = i
1359    output('\n')
1360
1361    if anyextern:
1362        output("#pragma GCC diagnostic pop\n\n")
1363
1364    for n in sorted(formats.keys()):
1365        f = formats[n]
1366        f.output_extract()
1367
1368    output(decode_scope, 'bool ', decode_function,
1369           '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1370
1371    i4 = str_indent(4)
1372
1373    if len(allpatterns) != 0:
1374        output(i4, 'union {\n')
1375        for n in sorted(arguments.keys()):
1376            f = arguments[n]
1377            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1378        output(i4, '} u;\n\n')
1379        toppat.output_code(4, False, 0, 0)
1380
1381    output(i4, 'return false;\n')
1382    output('}\n')
1383
1384    if variablewidth:
1385        output('\n', decode_scope, insntype, ' ', decode_function,
1386               '_load(DisasContext *ctx)\n{\n',
1387               '    ', insntype, ' insn = 0;\n\n')
1388        stree.output_code(4, 0, 0, 0)
1389        output('}\n')
1390
1391    if output_file:
1392        output_fd.close()
1393# end main
1394
1395
1396if __name__ == '__main__':
1397    main()
1398