1#!/usr/bin/env python
2##############################################################################
3#
4# Copyright (c) 2002 Zope Foundation and Contributors.
5# All Rights Reserved.
6#
7# This software is subject to the provisions of the Zope Public License,
8# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
9# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
10# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
11# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
12# FOR A PARTICULAR PURPOSE
13#
14##############################################################################
15
16"""Check FileStorage for dangling references.
17
18usage: fsrefs.py [-v] data.fs
19
20fsrefs.py checks object sanity by trying to load the current revision of
21every object O in the database, and also verifies that every object
22directly reachable from each such O exists in the database.
23
24It's hard to explain exactly what it does because it relies on undocumented
25features in Python's cPickle module:  many of the crucial steps of loading
26an object are taken, but application objects aren't actually created.  This
27saves a lot of time, and allows fsrefs to be run even if the code
28implementing the object classes isn't available.
29
30A read-only connection to the specified FileStorage is made, but it is not
31recommended to run fsrefs against a live FileStorage.  Because a live
32FileStorage is mutating while fsrefs runs, it's not possible for fsrefs to
33get a wholly consistent view of the database across the entire time fsrefs
34is running; spurious error messages may result.
35
36fsrefs doesn't normally produce any output.  If an object fails to load, the
37oid of the object is given in a message saying so, and if -v was specified
38then the traceback corresponding to the load failure is also displayed
39(this is the only effect of the -v flag).
40
41Three other kinds of errors are also detected, when an object O loads OK,
42and directly refers to a persistent object P but there's a problem with P:
43
44 - If P doesn't exist in the database, a message saying so is displayed.
45   The unsatisifiable reference to P is often called a "dangling
46   reference"; P is called "missing" in the error output.
47
48 - If the current state of the database is such that P's creation has
49   been undone, then P can't be loaded either.  This is also a kind of
50   dangling reference, but is identified as "object creation was undone".
51
52 - If P can't be loaded (but does exist in the database), a message saying
53   that O refers to an object that can't be loaded is displayed.
54
55fsrefs also (indirectly) checks that the .index file is sane, because
56fsrefs uses the index to get its idea of what constitutes "all the objects
57in the database".
58
59Note these limitations:  because fsrefs only looks at the current revision
60of objects, it does not attempt to load objects in versions, or non-current
61revisions of objects; therefore fsrefs cannot find problems in versions or
62in non-current revisions.
63"""
64from __future__ import print_function
65import traceback
66
67from ZODB.FileStorage import FileStorage
68from ZODB.TimeStamp import TimeStamp
69from ZODB.utils import u64, oid_repr, get_pickle_metadata, load_current
70from ZODB.serialize import get_refs
71from ZODB.POSException import POSKeyError
72
73# There's a problem with oid.  'data' is its pickle, and 'serial' its
74# serial number.  'missing' is a list of (oid, class, reason) triples,
75# explaining what the problem(s) is(are).
76def report(oid, data, serial, missing):
77    from_mod, from_class = get_pickle_metadata(data)
78    if len(missing) > 1:
79        plural = "s"
80    else:
81        plural = ""
82    ts = TimeStamp(serial)
83    print("oid %s %s.%s" % (hex(u64(oid)), from_mod, from_class))
84    print("last updated: %s, tid=%s" % (ts, hex(u64(serial))))
85    print("refers to invalid object%s:" % plural)
86    for oid, info, reason in missing:
87        if isinstance(info, tuple):
88            description = "%s.%s" % info
89        else:
90            description = str(info)
91        print("\toid %s %s: %r" % (oid_repr(oid), reason, description))
92    print()
93
94def main(path=None):
95    verbose = 0
96    if path is None:
97        import sys
98        import getopt
99
100        opts, args = getopt.getopt(sys.argv[1:], "v")
101        for k, v in opts:
102            if k == "-v":
103                verbose += 1
104
105        path, = args
106
107
108    fs = FileStorage(path, read_only=1)
109
110    # Set of oids in the index that failed to load due to POSKeyError.
111    # This is what happens if undo is applied to the transaction creating
112    # the object (the oid is still in the index, but its current data
113    # record has a backpointer of 0, and POSKeyError is raised then
114    # because of that backpointer).
115    undone = {}
116
117    # Set of oids that were present in the index but failed to load.
118    # This does not include oids in undone.
119    noload = {}
120
121    for oid in fs._index.keys():
122        try:
123            data, serial = load_current(fs, oid)
124        except (KeyboardInterrupt, SystemExit):
125            raise
126        except POSKeyError:
127            undone[oid] = 1
128        except:
129            if verbose:
130                traceback.print_exc()
131            noload[oid] = 1
132
133    inactive = noload.copy()
134    inactive.update(undone)
135    for oid in fs._index.keys():
136        if oid in inactive:
137            continue
138        data, serial = load_current(fs, oid)
139        refs = get_refs(data)
140        missing = [] # contains 3-tuples of oid, klass-metadata, reason
141        for ref, klass in refs:
142            if klass is None:
143                klass = '<unknown>'
144            if ref not in fs._index:
145                missing.append((ref, klass, "missing"))
146            if ref in noload:
147                missing.append((ref, klass, "failed to load"))
148            if ref in undone:
149                missing.append((ref, klass, "object creation was undone"))
150        if missing:
151            report(oid, data, serial, missing)
152
153if __name__ == "__main__":
154    main()
155