1##############################################################################
2#
3# Copyright (c) 2003 Zope Foundation and Contributors.
4#
5# This software is subject to the provisions of the Zope Public License,
6# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
7# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
8# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
9# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
10# FOR A PARTICULAR PURPOSE
11#
12##############################################################################
13"""Verify simple properties of bytecode.
14
15Some of the transformations performed by the RestrictionMutator are
16tricky.  This module checks the generated bytecode as a way to verify
17the correctness of the transformations.  Violations of some
18restrictions are obvious from inspection of the bytecode.  For
19example, the bytecode should never contain a LOAD_ATTR call, because
20all attribute access is performed via the _getattr_() checker
21function.
22"""
23
24import dis
25import types
26
27def verify(code):
28    """Verify all code objects reachable from code.
29
30    In particular, traverse into contained code objects in the
31    co_consts table.
32    """
33    verifycode(code)
34    for ob in code.co_consts:
35        if isinstance(ob, types.CodeType):
36            verify(ob)
37
38def verifycode(code):
39    try:
40        _verifycode(code)
41    except:
42        dis.dis(code)
43        raise
44
45def _verifycode(code):
46    line = code.co_firstlineno
47    # keep a window of the last three opcodes, with the most recent first
48    window = (None, None, None)
49    with_context = (None, None)
50
51    for op in disassemble(code):
52        if op.line is not None:
53            line = op.line
54        if op.opname.endswith("LOAD_ATTR"):
55            # All the user code that generates LOAD_ATTR should be
56            # rewritten, but the code generated for a list comp
57            # includes a LOAD_ATTR to extract the append method.
58            # Another exception is the new-in-Python 2.6 'context
59            # managers', which do a LOAD_ATTR for __exit__ and
60            # __enter__.
61            if op.arg == "__exit__":
62                with_context = (op, with_context[1])
63            elif op.arg == "__enter__":
64                with_context = (with_context[0], op)
65            elif not ((op.arg == "__enter__" and
66                       window[0].opname == "ROT_TWO" and
67                       window[1].opname == "DUP_TOP") or
68                      (op.arg == "append" and
69                       window[0].opname == "DUP_TOP" and
70                       window[1].opname == "BUILD_LIST")):
71                raise ValueError("direct attribute access %s: %s, %s:%d"
72                                 % (op.opname, op.arg, code.co_filename, line))
73        if op.opname in ("WITH_CLEANUP"):
74            # Here we check if the LOAD_ATTR for __exit__ and
75            # __enter__ were part of a 'with' statement by checking
76            # for the 'WITH_CLEANUP' bytecode. If one is seen, we
77            # clear the with_context variable and let it go. The
78            # access was safe.
79            with_context = (None, None)
80        if op.opname in ("STORE_ATTR", "DEL_ATTR"):
81            if not (window[0].opname == "CALL_FUNCTION" and
82                    window[2].opname == "LOAD_GLOBAL" and
83                    window[2].arg == "_write_"):
84                # check that arg is appropriately wrapped
85                for i, op in enumerate(window):
86                    print i, op.opname, op.arg
87                raise ValueError("unguard attribute set/del at %s:%d"
88                                 % (code.co_filename, line))
89        if op.opname.startswith("UNPACK"):
90            # An UNPACK opcode extracts items from iterables, and that's
91            # unsafe.  The restricted compiler doesn't remove UNPACK opcodes,
92            # but rather *inserts* a call to _getiter_() before each, and
93            # that's the pattern we need to see.
94            if not (window[0].opname == "CALL_FUNCTION" and
95                    window[1].opname == "ROT_TWO" and
96                    window[2].opname == "LOAD_GLOBAL" and
97                    window[2].arg == "_getiter_"):
98                raise ValueError("unguarded unpack sequence at %s:%d" %
99                                 (code.co_filename, line))
100
101        # should check CALL_FUNCTION_{VAR,KW,VAR_KW} but that would
102        # require a potentially unlimited history.  need to refactor
103        # the "window" before I can do that.
104
105        if op.opname == "LOAD_SUBSCR":
106            raise ValueError("unguarded index of sequence at %s:%d" %
107                             (code.co_filename, line))
108
109        window = (op,) + window[:2]
110
111    if not with_context == (None, None):
112        # An access to __enter__ and __exit__ was performed but not as
113        # part of a 'with' statement. This is not allowed.
114        for op in with_context:
115            if op is not None:
116                if op.line is not None:
117                    line = op.line
118                raise ValueError("direct attribute access %s: %s, %s:%d"
119                                 % (op.opname, op.arg, code.co_filename, line))
120
121class Op(object):
122    __slots__ = (
123        "opname",  # string, name of the opcode
124        "argcode", # int, the number of the argument
125        "arg",     # any, the object, name, or value of argcode
126        "line",    # int, line number or None
127        "target",  # boolean, is this op the target of a jump
128        "pos",     # int, offset in the bytecode
129        )
130
131    def __init__(self, opcode, pos):
132        self.opname = dis.opname[opcode]
133        self.arg = None
134        self.line = None
135        self.target = False
136        self.pos = pos
137
138def disassemble(co, lasti=-1):
139    code = co.co_code
140    labels = dis.findlabels(code)
141    linestarts = dict(findlinestarts(co))
142    n = len(code)
143    i = 0
144    extended_arg = 0
145    free = co.co_cellvars + co.co_freevars
146    while i < n:
147        op = ord(code[i])
148        o = Op(op, i)
149        i += 1
150        if i in linestarts and i > 0:
151            o.line = linestarts[i]
152        if i in labels:
153            o.target = True
154        if op > dis.HAVE_ARGUMENT:
155            arg = ord(code[i]) + ord(code[i+1]) * 256 + extended_arg
156            extended_arg = 0
157            i += 2
158            if op == dis.EXTENDED_ARG:
159                extended_arg = arg << 16
160            o.argcode = arg
161            if op in dis.hasconst:
162                o.arg = co.co_consts[arg]
163            elif op in dis.hasname:
164                o.arg = co.co_names[arg]
165            elif op in dis.hasjrel:
166                o.arg = i + arg
167            elif op in dis.haslocal:
168                o.arg = co.co_varnames[arg]
169            elif op in dis.hascompare:
170                o.arg = dis.cmp_op[arg]
171            elif op in dis.hasfree:
172                o.arg = free[arg]
173        yield o
174
175# findlinestarts is copied from Python 2.4's dis module.  The code
176# didn't exist in 2.3, but it would be painful to code disassemble()
177# without it.
178def findlinestarts(code):
179    """Find the offsets in a byte code which are start of lines in the source.
180
181    Generate pairs (offset, lineno) as described in Python/compile.c.
182
183    """
184    byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
185    line_increments = [ord(c) for c in code.co_lnotab[1::2]]
186
187    lastlineno = None
188    lineno = code.co_firstlineno
189    addr = 0
190    for byte_incr, line_incr in zip(byte_increments, line_increments):
191        if byte_incr:
192            if lineno != lastlineno:
193                yield (addr, lineno)
194                lastlineno = lineno
195            addr += byte_incr
196        lineno += line_incr
197    if lineno != lastlineno:
198        yield (addr, lineno)
199