1# -*- test-case-name: twisted.test.test_persisted -*-
2
3# Copyright (c) Twisted Matrix Laboratories.
4# See LICENSE for details.
5
6"""
7AOT: Abstract Object Trees
8The source-code-marshallin'est abstract-object-serializin'est persister
9this side of Marmalade!
10"""
11
12
13import copyreg as copy_reg
14import re
15import types
16from tokenize import generate_tokens as tokenize
17
18from twisted.persisted import crefutil
19from twisted.python import log, reflect
20from twisted.python.compat import _constructMethod
21
22###########################
23# Abstract Object Classes #
24###########################
25
26# "\0" in a getSource means "insert variable-width indention here".
27# see `indentify'.
28
29
30class Named:
31    def __init__(self, name):
32        self.name = name
33
34
35class Class(Named):
36    def getSource(self):
37        return "Class(%r)" % self.name
38
39
40class Function(Named):
41    def getSource(self):
42        return "Function(%r)" % self.name
43
44
45class Module(Named):
46    def getSource(self):
47        return "Module(%r)" % self.name
48
49
50class InstanceMethod:
51    def __init__(self, name, klass, inst):
52        if not (
53            isinstance(inst, Ref)
54            or isinstance(inst, Instance)
55            or isinstance(inst, Deref)
56        ):
57            raise TypeError("%s isn't an Instance, Ref, or Deref!" % inst)
58        self.name = name
59        self.klass = klass
60        self.instance = inst
61
62    def getSource(self):
63        return "InstanceMethod({!r}, {!r}, \n\0{})".format(
64            self.name,
65            self.klass,
66            prettify(self.instance),
67        )
68
69
70class _NoStateObj:
71    pass
72
73
74NoStateObj = _NoStateObj()
75
76_SIMPLE_BUILTINS = [
77    bool,
78    bytes,
79    str,
80    int,
81    float,
82    complex,
83    type(None),
84    slice,
85    type(Ellipsis),
86]
87
88
89class Instance:
90    def __init__(self, className, __stateObj__=NoStateObj, **state):
91        if not isinstance(className, str):
92            raise TypeError("%s isn't a string!" % className)
93        self.klass = className
94        if __stateObj__ is not NoStateObj:
95            self.state = __stateObj__
96            self.stateIsDict = 0
97        else:
98            self.state = state
99            self.stateIsDict = 1
100
101    def getSource(self):
102        # XXX make state be foo=bar instead of a dict.
103        if self.stateIsDict:
104            stateDict = self.state
105        elif isinstance(self.state, Ref) and isinstance(self.state.obj, dict):
106            stateDict = self.state.obj
107        else:
108            stateDict = None
109        if stateDict is not None:
110            try:
111                return f"Instance({self.klass!r}, {dictToKW(stateDict)})"
112            except NonFormattableDict:
113                return f"Instance({self.klass!r}, {prettify(stateDict)})"
114        return f"Instance({self.klass!r}, {prettify(self.state)})"
115
116
117class Ref:
118    def __init__(self, *args):
119        # blargh, lame.
120        if len(args) == 2:
121            self.refnum = args[0]
122            self.obj = args[1]
123        elif not args:
124            self.refnum = None
125            self.obj = None
126
127    def setRef(self, num):
128        if self.refnum:
129            raise ValueError(f"Error setting id {num}, I already have {self.refnum}")
130        self.refnum = num
131
132    def setObj(self, obj):
133        if self.obj:
134            raise ValueError(f"Error setting obj {obj}, I already have {self.obj}")
135        self.obj = obj
136
137    def getSource(self):
138        if self.obj is None:
139            raise RuntimeError(
140                "Don't try to display me before setting an object on me!"
141            )
142        if self.refnum:
143            return "Ref(%d, \n\0%s)" % (self.refnum, prettify(self.obj))
144        return prettify(self.obj)
145
146
147class Deref:
148    def __init__(self, num):
149        self.refnum = num
150
151    def getSource(self):
152        return "Deref(%d)" % self.refnum
153
154    __repr__ = getSource
155
156
157class Copyreg:
158    def __init__(self, loadfunc, state):
159        self.loadfunc = loadfunc
160        self.state = state
161
162    def getSource(self):
163        return f"Copyreg({self.loadfunc!r}, {prettify(self.state)})"
164
165
166###############
167# Marshalling #
168###############
169
170
171def getSource(ao):
172    """Pass me an AO, I'll return a nicely-formatted source representation."""
173    return indentify("app = " + prettify(ao))
174
175
176class NonFormattableDict(Exception):
177    """A dictionary was not formattable."""
178
179
180r = re.compile("[a-zA-Z_][a-zA-Z0-9_]*$")
181
182
183def dictToKW(d):
184    out = []
185    items = list(d.items())
186    items.sort()
187    for k, v in items:
188        if not isinstance(k, str):
189            raise NonFormattableDict("%r ain't a string" % k)
190        if not r.match(k):
191            raise NonFormattableDict("%r ain't an identifier" % k)
192        out.append(f"\n\0{k}={prettify(v)},")
193    return "".join(out)
194
195
196def prettify(obj):
197    if hasattr(obj, "getSource"):
198        return obj.getSource()
199    else:
200        # basic type
201        t = type(obj)
202
203        if t in _SIMPLE_BUILTINS:
204            return repr(obj)
205
206        elif t is dict:
207            out = ["{"]
208            for k, v in obj.items():
209                out.append(f"\n\0{prettify(k)}: {prettify(v)},")
210            out.append(len(obj) and "\n\0}" or "}")
211            return "".join(out)
212
213        elif t is list:
214            out = ["["]
215            for x in obj:
216                out.append("\n\0%s," % prettify(x))
217            out.append(len(obj) and "\n\0]" or "]")
218            return "".join(out)
219
220        elif t is tuple:
221            out = ["("]
222            for x in obj:
223                out.append("\n\0%s," % prettify(x))
224            out.append(len(obj) and "\n\0)" or ")")
225            return "".join(out)
226        else:
227            raise TypeError(f"Unsupported type {t} when trying to prettify {obj}.")
228
229
230def indentify(s):
231    out = []
232    stack = []
233    l = ["", s]
234    for (
235        tokenType,
236        tokenString,
237        (startRow, startColumn),
238        (endRow, endColumn),
239        logicalLine,
240    ) in tokenize(l.pop):
241        if tokenString in ["[", "(", "{"]:
242            stack.append(tokenString)
243        elif tokenString in ["]", ")", "}"]:
244            stack.pop()
245        if tokenString == "\0":
246            out.append("  " * len(stack))
247        else:
248            out.append(tokenString)
249    return "".join(out)
250
251
252###########
253# Unjelly #
254###########
255
256
257def unjellyFromAOT(aot):
258    """
259    Pass me an Abstract Object Tree, and I'll unjelly it for you.
260    """
261    return AOTUnjellier().unjelly(aot)
262
263
264def unjellyFromSource(stringOrFile):
265    """
266    Pass me a string of code or a filename that defines an 'app' variable (in
267    terms of Abstract Objects!), and I'll execute it and unjelly the resulting
268    AOT for you, returning a newly unpersisted Application object!
269    """
270
271    ns = {
272        "Instance": Instance,
273        "InstanceMethod": InstanceMethod,
274        "Class": Class,
275        "Function": Function,
276        "Module": Module,
277        "Ref": Ref,
278        "Deref": Deref,
279        "Copyreg": Copyreg,
280    }
281
282    if hasattr(stringOrFile, "read"):
283        source = stringOrFile.read()
284    else:
285        source = stringOrFile
286    code = compile(source, "<source>", "exec")
287    eval(code, ns, ns)
288
289    if "app" in ns:
290        return unjellyFromAOT(ns["app"])
291    else:
292        raise ValueError("%s needs to define an 'app', it didn't!" % stringOrFile)
293
294
295class AOTUnjellier:
296    """I handle the unjellying of an Abstract Object Tree.
297    See AOTUnjellier.unjellyAO
298    """
299
300    def __init__(self):
301        self.references = {}
302        self.stack = []
303        self.afterUnjelly = []
304
305    ##
306    # unjelly helpers (copied pretty much directly from (now deleted) marmalade)
307    ##
308    def unjellyLater(self, node):
309        """Unjelly a node, later."""
310        d = crefutil._Defer()
311        self.unjellyInto(d, 0, node)
312        return d
313
314    def unjellyInto(self, obj, loc, ao):
315        """Utility method for unjellying one object into another.
316        This automates the handling of backreferences.
317        """
318        o = self.unjellyAO(ao)
319        obj[loc] = o
320        if isinstance(o, crefutil.NotKnown):
321            o.addDependant(obj, loc)
322        return o
323
324    def callAfter(self, callable, result):
325        if isinstance(result, crefutil.NotKnown):
326            listResult = [None]
327            result.addDependant(listResult, 1)
328        else:
329            listResult = [result]
330        self.afterUnjelly.append((callable, listResult))
331
332    def unjellyAttribute(self, instance, attrName, ao):
333        # XXX this is unused????
334        """Utility method for unjellying into instances of attributes.
335
336        Use this rather than unjellyAO unless you like surprising bugs!
337        Alternatively, you can use unjellyInto on your instance's __dict__.
338        """
339        self.unjellyInto(instance.__dict__, attrName, ao)
340
341    def unjellyAO(self, ao):
342        """Unjelly an Abstract Object and everything it contains.
343        I return the real object.
344        """
345        self.stack.append(ao)
346        t = type(ao)
347        if t in _SIMPLE_BUILTINS:
348            return ao
349
350        elif t is list:
351            l = []
352            for x in ao:
353                l.append(None)
354                self.unjellyInto(l, len(l) - 1, x)
355            return l
356
357        elif t is tuple:
358            l = []
359            tuple_ = tuple
360            for x in ao:
361                l.append(None)
362                if isinstance(self.unjellyInto(l, len(l) - 1, x), crefutil.NotKnown):
363                    tuple_ = crefutil._Tuple
364            return tuple_(l)
365
366        elif t is dict:
367            d = {}
368            for k, v in ao.items():
369                kvd = crefutil._DictKeyAndValue(d)
370                self.unjellyInto(kvd, 0, k)
371                self.unjellyInto(kvd, 1, v)
372            return d
373        else:
374            # Abstract Objects
375            c = ao.__class__
376            if c is Module:
377                return reflect.namedModule(ao.name)
378
379            elif c in [Class, Function] or issubclass(c, type):
380                return reflect.namedObject(ao.name)
381
382            elif c is InstanceMethod:
383                im_name = ao.name
384                im_class = reflect.namedObject(ao.klass)
385                im_self = self.unjellyAO(ao.instance)
386                if im_name in im_class.__dict__:
387                    if im_self is None:
388                        return getattr(im_class, im_name)
389                    elif isinstance(im_self, crefutil.NotKnown):
390                        return crefutil._InstanceMethod(im_name, im_self, im_class)
391                    else:
392                        return _constructMethod(im_class, im_name, im_self)
393                else:
394                    raise TypeError("instance method changed")
395
396            elif c is Instance:
397                klass = reflect.namedObject(ao.klass)
398                state = self.unjellyAO(ao.state)
399                inst = klass.__new__(klass)
400                if hasattr(klass, "__setstate__"):
401                    self.callAfter(inst.__setstate__, state)
402                else:
403                    inst.__dict__ = state
404                return inst
405
406            elif c is Ref:
407                o = self.unjellyAO(ao.obj)  # THIS IS CHANGING THE REF OMG
408                refkey = ao.refnum
409                ref = self.references.get(refkey)
410                if ref is None:
411                    self.references[refkey] = o
412                elif isinstance(ref, crefutil.NotKnown):
413                    ref.resolveDependants(o)
414                    self.references[refkey] = o
415                elif refkey is None:
416                    # This happens when you're unjellying from an AOT not read from source
417                    pass
418                else:
419                    raise ValueError(
420                        "Multiple references with the same ID: %s, %s, %s!"
421                        % (ref, refkey, ao)
422                    )
423                return o
424
425            elif c is Deref:
426                num = ao.refnum
427                ref = self.references.get(num)
428                if ref is None:
429                    der = crefutil._Dereference(num)
430                    self.references[num] = der
431                    return der
432                return ref
433
434            elif c is Copyreg:
435                loadfunc = reflect.namedObject(ao.loadfunc)
436                d = self.unjellyLater(ao.state).addCallback(
437                    lambda result, _l: _l(*result), loadfunc
438                )
439                return d
440            else:
441                raise TypeError("Unsupported AOT type: %s" % t)
442
443    def unjelly(self, ao):
444        try:
445            l = [None]
446            self.unjellyInto(l, 0, ao)
447            for func, v in self.afterUnjelly:
448                func(v[0])
449            return l[0]
450        except BaseException:
451            log.msg("Error jellying object! Stacktrace follows::")
452            log.msg("\n".join(map(repr, self.stack)))
453            raise
454
455
456#########
457# Jelly #
458#########
459
460
461def jellyToAOT(obj):
462    """Convert an object to an Abstract Object Tree."""
463    return AOTJellier().jelly(obj)
464
465
466def jellyToSource(obj, file=None):
467    """
468    Pass me an object and, optionally, a file object.
469    I'll convert the object to an AOT either return it (if no file was
470    specified) or write it to the file.
471    """
472
473    aot = jellyToAOT(obj)
474    if file:
475        file.write(getSource(aot).encode("utf-8"))
476    else:
477        return getSource(aot)
478
479
480def _classOfMethod(methodObject):
481    """
482    Get the associated class of the given method object.
483
484    @param methodObject: a bound method
485    @type methodObject: L{types.MethodType}
486
487    @return: a class
488    @rtype: L{type}
489    """
490    return methodObject.__self__.__class__
491
492
493def _funcOfMethod(methodObject):
494    """
495    Get the associated function of the given method object.
496
497    @param methodObject: a bound method
498    @type methodObject: L{types.MethodType}
499
500    @return: the function implementing C{methodObject}
501    @rtype: L{types.FunctionType}
502    """
503    return methodObject.__func__
504
505
506def _selfOfMethod(methodObject):
507    """
508    Get the object that a bound method is bound to.
509
510    @param methodObject: a bound method
511    @type methodObject: L{types.MethodType}
512
513    @return: the C{self} passed to C{methodObject}
514    @rtype: L{object}
515    """
516    return methodObject.__self__
517
518
519class AOTJellier:
520    def __init__(self):
521        # dict of {id(obj): (obj, node)}
522        self.prepared = {}
523        self._ref_id = 0
524        self.stack = []
525
526    def prepareForRef(self, aoref, object):
527        """I prepare an object for later referencing, by storing its id()
528        and its _AORef in a cache."""
529        self.prepared[id(object)] = aoref
530
531    def jellyToAO(self, obj):
532        """I turn an object into an AOT and return it."""
533        objType = type(obj)
534        self.stack.append(repr(obj))
535
536        # immutable: We don't care if these have multiple refs!
537        if objType in _SIMPLE_BUILTINS:
538            retval = obj
539
540        elif issubclass(objType, types.MethodType):
541            # TODO: make methods 'prefer' not to jelly the object internally,
542            # so that the object will show up where it's referenced first NOT
543            # by a method.
544            retval = InstanceMethod(
545                _funcOfMethod(obj).__name__,
546                reflect.qual(_classOfMethod(obj)),
547                self.jellyToAO(_selfOfMethod(obj)),
548            )
549
550        elif issubclass(objType, types.ModuleType):
551            retval = Module(obj.__name__)
552
553        elif issubclass(objType, type):
554            retval = Class(reflect.qual(obj))
555
556        elif objType is types.FunctionType:
557            retval = Function(reflect.fullFuncName(obj))
558
559        else:  # mutable! gotta watch for refs.
560
561            # Marmalade had the nicety of being able to just stick a 'reference' attribute
562            # on any Node object that was referenced, but in AOT, the referenced object
563            # is *inside* of a Ref call (Ref(num, obj) instead of
564            # <objtype ... reference="1">). The problem is, especially for built-in types,
565            # I can't just assign some attribute to them to give them a refnum. So, I have
566            # to "wrap" a Ref(..) around them later -- that's why I put *everything* that's
567            # mutable inside one. The Ref() class will only print the "Ref(..)" around an
568            # object if it has a Reference explicitly attached.
569
570            if id(obj) in self.prepared:
571                oldRef = self.prepared[id(obj)]
572                if oldRef.refnum:
573                    # it's been referenced already
574                    key = oldRef.refnum
575                else:
576                    # it hasn't been referenced yet
577                    self._ref_id = self._ref_id + 1
578                    key = self._ref_id
579                    oldRef.setRef(key)
580                return Deref(key)
581
582            retval = Ref()
583
584            def _stateFrom(state):
585                retval.setObj(
586                    Instance(reflect.qual(obj.__class__), self.jellyToAO(state))
587                )
588
589            self.prepareForRef(retval, obj)
590
591            if objType is list:
592                retval.setObj([self.jellyToAO(o) for o in obj])  # hah!
593
594            elif objType is tuple:
595                retval.setObj(tuple(map(self.jellyToAO, obj)))
596
597            elif objType is dict:
598                d = {}
599                for k, v in obj.items():
600                    d[self.jellyToAO(k)] = self.jellyToAO(v)
601                retval.setObj(d)
602
603            elif objType in copy_reg.dispatch_table:
604                unpickleFunc, state = copy_reg.dispatch_table[objType](obj)
605
606                retval.setObj(
607                    Copyreg(reflect.fullFuncName(unpickleFunc), self.jellyToAO(state))
608                )
609
610            elif hasattr(obj, "__getstate__"):
611                _stateFrom(obj.__getstate__())
612            elif hasattr(obj, "__dict__"):
613                _stateFrom(obj.__dict__)
614            else:
615                raise TypeError("Unsupported type: %s" % objType.__name__)
616
617        del self.stack[-1]
618        return retval
619
620    def jelly(self, obj):
621        try:
622            ao = self.jellyToAO(obj)
623            return ao
624        except BaseException:
625            log.msg("Error jellying object! Stacktrace follows::")
626            log.msg("\n".join(self.stack))
627            raise
628