1#!/usr/bin/env python
2##############################################################################
3#
4# Copyright (c) 2001, 2002, 2003 Zope Foundation and Contributors.
5# All Rights Reserved.
6#
7# This software is subject to the provisions of the Zope Public License,
8# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
9# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
10# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
11# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
12# FOR A PARTICULAR PURPOSE
13#
14##############################################################################
15"""A script to gather statistics while doing a storage migration.
16
17This is very similar to a standard storage's copyTransactionsFrom() method,
18except that it's geared to run as a script, and it collects useful pieces of
19information as it's working.  This script can be used to stress test a storage
20since it blasts transactions at it as fast as possible.  You can get a good
21sense of the performance of a storage by running this script.
22
23Actually it just counts the size of pickles in the transaction via the
24iterator protocol, so storage overheads aren't counted.
25
26Usage: %(PROGRAM)s [options] [source-storage-args] [destination-storage-args]
27Options:
28    -S sourcetype
29    --stype=sourcetype
30        This is the name of a recognized type for the source database.  Use -T
31        to print out the known types.  Defaults to "file".
32
33    -D desttype
34    --dtype=desttype
35        This is the name of the recognized type for the destination database.
36        Use -T to print out the known types.  Defaults to "file".
37
38    -o filename
39    --output=filename
40        Print results in filename, otherwise stdout.
41
42    -m txncount
43    --max=txncount
44        Stop after committing txncount transactions.
45
46    -k txncount
47    --skip=txncount
48        Skip the first txncount transactions.
49
50    -p/--profile
51        Turn on specialized profiling.
52
53    -t/--timestamps
54        Print tids as timestamps.
55
56    -T/--storage_types
57        Print all the recognized storage types and exit.
58
59    -v/--verbose
60        Turns on verbose output.  Multiple -v options increase the verbosity.
61
62    -h/--help
63        Print this message and exit.
64
65Positional arguments:
66
67    source-storage-args:
68        Semicolon separated list of arguments for the source storage, as
69        key=val pairs.  E.g. "file_name=Data.fs;read_only=1"
70
71    destination-storage-args:
72        Comma separated list of arguments for the source storage, as key=val
73        pairs.  E.g. "name=full;frequency=3600"
74"""
75from __future__ import print_function
76import re
77import sys
78import time
79import getopt
80import marshal
81import profile
82
83from persistent.timestamp import TimeStamp
84from ZODB import utils
85from ZODB import StorageTypes # XXX: This import does not exist
86
87
88PROGRAM = sys.argv[0]
89
90
91def usage(code, msg=''):
92    print(__doc__ % globals(), file=sys.stderr)
93    if msg:
94        print(msg, file=sys.stderr)
95    sys.exit(code)
96
97
98def error(code, msg):
99    print(msg, file=sys.stderr)
100    print("use --help for usage message")
101    sys.exit(code)
102
103
104def main():
105    try:
106        opts, args = getopt.getopt(
107            sys.argv[1:],
108            'hvo:pm:k:D:S:Tt',
109            ['help', 'verbose',
110             'output=', 'profile', 'storage_types',
111             'max=', 'skip=', 'dtype=', 'stype=', 'timestamps'])
112    except getopt.error as msg:
113        error(2, msg)
114
115    class Options(object):
116        stype = 'FileStorage'
117        dtype = 'FileStorage'
118        verbose = 0
119        outfile = None
120        profilep = False
121        maxtxn = -1
122        skiptxn = -1
123        timestamps = False
124
125    options = Options()
126
127    for opt, arg in opts:
128        if opt in ('-h', '--help'):
129            usage(0)
130        elif opt in ('-v', '--verbose'):
131            options.verbose += 1
132        elif opt in ('-T', '--storage_types'):
133            print_types()
134            sys.exit(0)
135        elif opt in ('-S', '--stype'):
136            options.stype = arg
137        elif opt in ('-D', '--dtype'):
138            options.dtype = arg
139        elif opt in ('-o', '--output'):
140            options.outfile = arg
141        elif opt in ('-p', '--profile'):
142            options.profilep = True
143        elif opt in ('-m', '--max'):
144            options.maxtxn = int(arg)
145        elif opt in ('-k', '--skip'):
146            options.skiptxn = int(arg)
147        elif opt in ('-t', '--timestamps'):
148            options.timestamps = True
149
150    if len(args) > 2:
151        error(2, "too many arguments")
152
153    srckws = {}
154    if len(args) > 0:
155        srcargs = args[0]
156        for kv in re.split(r';\s*', srcargs):
157            key, val = kv.split('=')
158            srckws[key] = val
159
160    destkws = {}
161    if len(args) > 1:
162        destargs = args[1]
163        for kv in re.split(r';\s*', destargs):
164            key, val = kv.split('=')
165            destkws[key] = val
166
167    if options.stype not in StorageTypes.storage_types.keys():
168        usage(2, 'Source database type must be provided')
169    if options.dtype not in StorageTypes.storage_types.keys():
170        usage(2, 'Destination database type must be provided')
171
172    # Open the output file
173    if options.outfile is None:
174        options.outfp = sys.stdout
175        options.outclosep = False
176    else:
177        options.outfp = open(options.outfile, 'w')
178        options.outclosep = True
179
180    if options.verbose > 0:
181        print('Opening source database...')
182    modname, sconv = StorageTypes.storage_types[options.stype]
183    kw = sconv(**srckws)
184    __import__(modname)
185    sclass = getattr(sys.modules[modname], options.stype)
186    srcdb = sclass(**kw)
187
188    if options.verbose > 0:
189        print('Opening destination database...')
190    modname, dconv = StorageTypes.storage_types[options.dtype]
191    kw = dconv(**destkws)
192    __import__(modname)
193    dclass = getattr(sys.modules[modname], options.dtype)
194    dstdb = dclass(**kw)
195
196    try:
197        t0 = time.time()
198        doit(srcdb, dstdb, options)
199        t1 = time.time()
200        if options.verbose > 0:
201            print('Migration time:          %8.3f' % (t1-t0))
202    finally:
203        # Done
204        srcdb.close()
205        dstdb.close()
206        if options.outclosep:
207            options.outfp.close()
208
209
210def doit(srcdb, dstdb, options):
211    outfp = options.outfp
212    profilep = options.profilep
213    verbose = options.verbose
214    # some global information
215    largest_pickle = 0
216    largest_txn_in_size = 0
217    largest_txn_in_objects = 0
218    total_pickle_size = 0
219    total_object_count = 0
220    # Ripped from BaseStorage.copyTransactionsFrom()
221    ts = None
222    ok = True
223    prevrevids = {}
224    counter = 0
225    skipper = 0
226    if options.timestamps:
227        print("%4s. %26s %6s %8s %5s %5s %5s %5s %5s" % (
228            "NUM", "TID AS TIMESTAMP", "OBJS", "BYTES",
229            # Does anybody know what these times mean?
230            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3"))
231    else:
232        print("%4s. %20s %6s %8s %6s %6s %6s %6s %6s" % (
233            "NUM", "TRANSACTION ID", "OBJS", "BYTES",
234            # Does anybody know what these times mean?
235            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3"))
236    for txn in srcdb.iterator():
237        skipper += 1
238        if skipper <= options.skiptxn:
239            continue
240        counter += 1
241        if counter > options.maxtxn >= 0:
242            break
243        tid = txn.tid
244        if ts is None:
245            ts = TimeStamp(tid)
246        else:
247            t = TimeStamp(tid)
248            if t <= ts:
249                if ok:
250                    print((
251                        'Time stamps are out of order %s, %s' % (ts, t)), file=sys.stderr)
252                    ok = False
253                    ts = t.laterThan(ts)
254                    tid = ts.raw()
255                else:
256                    ts = t
257                    if not ok:
258                        print((
259                            'Time stamps are back in order %s' % t), file=sys.stderr)
260                        ok = True
261        if verbose > 1:
262            print(ts)
263
264        prof = None
265        if profilep and (counter % 100) == 0:
266            prof = profile.Profile()
267        objects = 0
268        size = 0
269        newrevids = RevidAccumulator()
270        t0 = time.time()
271        dstdb.tpc_begin(txn, tid, txn.status)
272        t1 = time.time()
273        for r in txn:
274            oid = r.oid
275            objects += 1
276            thissize = len(r.data)
277            size += thissize
278            if thissize > largest_pickle:
279                largest_pickle = thissize
280            if verbose > 1:
281                if not r.version:
282                    vstr = 'norev'
283                else:
284                    vstr = r.version
285                print(utils.U64(oid), vstr, len(r.data))
286            oldrevid = prevrevids.get(oid, utils.z64)
287            result = dstdb.store(oid, oldrevid, r.data, r.version, txn)
288            newrevids.store(oid, result)
289        t2 = time.time()
290        result = dstdb.tpc_vote(txn)
291        t3 = time.time()
292        newrevids.tpc_vote(result)
293        prevrevids.update(newrevids.get_dict())
294        # Profile every 100 transactions
295        if prof:
296            prof.runcall(dstdb.tpc_finish, txn)
297        else:
298            dstdb.tpc_finish(txn)
299        t4 = time.time()
300
301        # record the results
302        if objects > largest_txn_in_objects:
303            largest_txn_in_objects = objects
304        if size > largest_txn_in_size:
305            largest_txn_in_size = size
306        if options.timestamps:
307            tidstr = str(TimeStamp(tid))
308            format = "%4d. %26s %6d %8d %5.3f %5.3f %5.3f %5.3f %5.3f"
309        else:
310            tidstr = utils.U64(tid)
311            format = "%4d. %20s %6d %8d %6.4f %6.4f %6.4f %6.4f %6.4f"
312        print(format % (skipper, tidstr, objects, size,
313                                  t4-t0, t1-t0, t2-t1, t3-t2, t4-t3), file=outfp)
314        total_pickle_size += size
315        total_object_count += objects
316
317        if prof:
318            prof.create_stats()
319            fp = open('profile-%02d.txt' % (counter / 100), 'wb')
320            marshal.dump(prof.stats, fp)
321            fp.close()
322    print("Largest pickle:          %8d" % largest_pickle, file=outfp)
323    print("Largest transaction:     %8d" % largest_txn_in_size, file=outfp)
324    print("Largest object count:    %8d" % largest_txn_in_objects, file=outfp)
325    print("Total pickle size: %14d" % total_pickle_size, file=outfp)
326    print("Total object count:      %8d" % total_object_count, file=outfp)
327
328
329# helper to deal with differences between old-style store() return and
330# new-style store() return that supports ZEO
331
332class RevidAccumulator(object):
333
334    def __init__(self):
335        self.data = {}
336
337    def _update_from_list(self, list):
338        for oid, serial in list:
339            if not isinstance(serial, str):
340                raise serial
341            self.data[oid] = serial
342
343    def store(self, oid, result):
344        if isinstance(result, str):
345            self.data[oid] = result
346        elif result is not None:
347            self._update_from_list(result)
348
349    def tpc_vote(self, result):
350        if result is not None:
351            self._update_from_list(result)
352
353    def get_dict(self):
354        return self.data
355
356
357if __name__ == '__main__':
358    main()
359