1#!/usr/bin/env python 2############################################################################## 3# 4# Copyright (c) 2001, 2002, 2003 Zope Foundation and Contributors. 5# All Rights Reserved. 6# 7# This software is subject to the provisions of the Zope Public License, 8# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. 9# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED 10# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 11# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS 12# FOR A PARTICULAR PURPOSE 13# 14############################################################################## 15"""A script to gather statistics while doing a storage migration. 16 17This is very similar to a standard storage's copyTransactionsFrom() method, 18except that it's geared to run as a script, and it collects useful pieces of 19information as it's working. This script can be used to stress test a storage 20since it blasts transactions at it as fast as possible. You can get a good 21sense of the performance of a storage by running this script. 22 23Actually it just counts the size of pickles in the transaction via the 24iterator protocol, so storage overheads aren't counted. 25 26Usage: %(PROGRAM)s [options] [source-storage-args] [destination-storage-args] 27Options: 28 -S sourcetype 29 --stype=sourcetype 30 This is the name of a recognized type for the source database. Use -T 31 to print out the known types. Defaults to "file". 32 33 -D desttype 34 --dtype=desttype 35 This is the name of the recognized type for the destination database. 36 Use -T to print out the known types. Defaults to "file". 37 38 -o filename 39 --output=filename 40 Print results in filename, otherwise stdout. 41 42 -m txncount 43 --max=txncount 44 Stop after committing txncount transactions. 45 46 -k txncount 47 --skip=txncount 48 Skip the first txncount transactions. 49 50 -p/--profile 51 Turn on specialized profiling. 52 53 -t/--timestamps 54 Print tids as timestamps. 55 56 -T/--storage_types 57 Print all the recognized storage types and exit. 58 59 -v/--verbose 60 Turns on verbose output. Multiple -v options increase the verbosity. 61 62 -h/--help 63 Print this message and exit. 64 65Positional arguments: 66 67 source-storage-args: 68 Semicolon separated list of arguments for the source storage, as 69 key=val pairs. E.g. "file_name=Data.fs;read_only=1" 70 71 destination-storage-args: 72 Comma separated list of arguments for the source storage, as key=val 73 pairs. E.g. "name=full;frequency=3600" 74""" 75from __future__ import print_function 76import re 77import sys 78import time 79import getopt 80import marshal 81import profile 82 83from persistent.timestamp import TimeStamp 84from ZODB import utils 85from ZODB import StorageTypes # XXX: This import does not exist 86 87 88PROGRAM = sys.argv[0] 89 90 91def usage(code, msg=''): 92 print(__doc__ % globals(), file=sys.stderr) 93 if msg: 94 print(msg, file=sys.stderr) 95 sys.exit(code) 96 97 98def error(code, msg): 99 print(msg, file=sys.stderr) 100 print("use --help for usage message") 101 sys.exit(code) 102 103 104def main(): 105 try: 106 opts, args = getopt.getopt( 107 sys.argv[1:], 108 'hvo:pm:k:D:S:Tt', 109 ['help', 'verbose', 110 'output=', 'profile', 'storage_types', 111 'max=', 'skip=', 'dtype=', 'stype=', 'timestamps']) 112 except getopt.error as msg: 113 error(2, msg) 114 115 class Options(object): 116 stype = 'FileStorage' 117 dtype = 'FileStorage' 118 verbose = 0 119 outfile = None 120 profilep = False 121 maxtxn = -1 122 skiptxn = -1 123 timestamps = False 124 125 options = Options() 126 127 for opt, arg in opts: 128 if opt in ('-h', '--help'): 129 usage(0) 130 elif opt in ('-v', '--verbose'): 131 options.verbose += 1 132 elif opt in ('-T', '--storage_types'): 133 print_types() 134 sys.exit(0) 135 elif opt in ('-S', '--stype'): 136 options.stype = arg 137 elif opt in ('-D', '--dtype'): 138 options.dtype = arg 139 elif opt in ('-o', '--output'): 140 options.outfile = arg 141 elif opt in ('-p', '--profile'): 142 options.profilep = True 143 elif opt in ('-m', '--max'): 144 options.maxtxn = int(arg) 145 elif opt in ('-k', '--skip'): 146 options.skiptxn = int(arg) 147 elif opt in ('-t', '--timestamps'): 148 options.timestamps = True 149 150 if len(args) > 2: 151 error(2, "too many arguments") 152 153 srckws = {} 154 if len(args) > 0: 155 srcargs = args[0] 156 for kv in re.split(r';\s*', srcargs): 157 key, val = kv.split('=') 158 srckws[key] = val 159 160 destkws = {} 161 if len(args) > 1: 162 destargs = args[1] 163 for kv in re.split(r';\s*', destargs): 164 key, val = kv.split('=') 165 destkws[key] = val 166 167 if options.stype not in StorageTypes.storage_types.keys(): 168 usage(2, 'Source database type must be provided') 169 if options.dtype not in StorageTypes.storage_types.keys(): 170 usage(2, 'Destination database type must be provided') 171 172 # Open the output file 173 if options.outfile is None: 174 options.outfp = sys.stdout 175 options.outclosep = False 176 else: 177 options.outfp = open(options.outfile, 'w') 178 options.outclosep = True 179 180 if options.verbose > 0: 181 print('Opening source database...') 182 modname, sconv = StorageTypes.storage_types[options.stype] 183 kw = sconv(**srckws) 184 __import__(modname) 185 sclass = getattr(sys.modules[modname], options.stype) 186 srcdb = sclass(**kw) 187 188 if options.verbose > 0: 189 print('Opening destination database...') 190 modname, dconv = StorageTypes.storage_types[options.dtype] 191 kw = dconv(**destkws) 192 __import__(modname) 193 dclass = getattr(sys.modules[modname], options.dtype) 194 dstdb = dclass(**kw) 195 196 try: 197 t0 = time.time() 198 doit(srcdb, dstdb, options) 199 t1 = time.time() 200 if options.verbose > 0: 201 print('Migration time: %8.3f' % (t1-t0)) 202 finally: 203 # Done 204 srcdb.close() 205 dstdb.close() 206 if options.outclosep: 207 options.outfp.close() 208 209 210def doit(srcdb, dstdb, options): 211 outfp = options.outfp 212 profilep = options.profilep 213 verbose = options.verbose 214 # some global information 215 largest_pickle = 0 216 largest_txn_in_size = 0 217 largest_txn_in_objects = 0 218 total_pickle_size = 0 219 total_object_count = 0 220 # Ripped from BaseStorage.copyTransactionsFrom() 221 ts = None 222 ok = True 223 prevrevids = {} 224 counter = 0 225 skipper = 0 226 if options.timestamps: 227 print("%4s. %26s %6s %8s %5s %5s %5s %5s %5s" % ( 228 "NUM", "TID AS TIMESTAMP", "OBJS", "BYTES", 229 # Does anybody know what these times mean? 230 "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")) 231 else: 232 print("%4s. %20s %6s %8s %6s %6s %6s %6s %6s" % ( 233 "NUM", "TRANSACTION ID", "OBJS", "BYTES", 234 # Does anybody know what these times mean? 235 "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3")) 236 for txn in srcdb.iterator(): 237 skipper += 1 238 if skipper <= options.skiptxn: 239 continue 240 counter += 1 241 if counter > options.maxtxn >= 0: 242 break 243 tid = txn.tid 244 if ts is None: 245 ts = TimeStamp(tid) 246 else: 247 t = TimeStamp(tid) 248 if t <= ts: 249 if ok: 250 print(( 251 'Time stamps are out of order %s, %s' % (ts, t)), file=sys.stderr) 252 ok = False 253 ts = t.laterThan(ts) 254 tid = ts.raw() 255 else: 256 ts = t 257 if not ok: 258 print(( 259 'Time stamps are back in order %s' % t), file=sys.stderr) 260 ok = True 261 if verbose > 1: 262 print(ts) 263 264 prof = None 265 if profilep and (counter % 100) == 0: 266 prof = profile.Profile() 267 objects = 0 268 size = 0 269 newrevids = RevidAccumulator() 270 t0 = time.time() 271 dstdb.tpc_begin(txn, tid, txn.status) 272 t1 = time.time() 273 for r in txn: 274 oid = r.oid 275 objects += 1 276 thissize = len(r.data) 277 size += thissize 278 if thissize > largest_pickle: 279 largest_pickle = thissize 280 if verbose > 1: 281 if not r.version: 282 vstr = 'norev' 283 else: 284 vstr = r.version 285 print(utils.U64(oid), vstr, len(r.data)) 286 oldrevid = prevrevids.get(oid, utils.z64) 287 result = dstdb.store(oid, oldrevid, r.data, r.version, txn) 288 newrevids.store(oid, result) 289 t2 = time.time() 290 result = dstdb.tpc_vote(txn) 291 t3 = time.time() 292 newrevids.tpc_vote(result) 293 prevrevids.update(newrevids.get_dict()) 294 # Profile every 100 transactions 295 if prof: 296 prof.runcall(dstdb.tpc_finish, txn) 297 else: 298 dstdb.tpc_finish(txn) 299 t4 = time.time() 300 301 # record the results 302 if objects > largest_txn_in_objects: 303 largest_txn_in_objects = objects 304 if size > largest_txn_in_size: 305 largest_txn_in_size = size 306 if options.timestamps: 307 tidstr = str(TimeStamp(tid)) 308 format = "%4d. %26s %6d %8d %5.3f %5.3f %5.3f %5.3f %5.3f" 309 else: 310 tidstr = utils.U64(tid) 311 format = "%4d. %20s %6d %8d %6.4f %6.4f %6.4f %6.4f %6.4f" 312 print(format % (skipper, tidstr, objects, size, 313 t4-t0, t1-t0, t2-t1, t3-t2, t4-t3), file=outfp) 314 total_pickle_size += size 315 total_object_count += objects 316 317 if prof: 318 prof.create_stats() 319 fp = open('profile-%02d.txt' % (counter / 100), 'wb') 320 marshal.dump(prof.stats, fp) 321 fp.close() 322 print("Largest pickle: %8d" % largest_pickle, file=outfp) 323 print("Largest transaction: %8d" % largest_txn_in_size, file=outfp) 324 print("Largest object count: %8d" % largest_txn_in_objects, file=outfp) 325 print("Total pickle size: %14d" % total_pickle_size, file=outfp) 326 print("Total object count: %8d" % total_object_count, file=outfp) 327 328 329# helper to deal with differences between old-style store() return and 330# new-style store() return that supports ZEO 331 332class RevidAccumulator(object): 333 334 def __init__(self): 335 self.data = {} 336 337 def _update_from_list(self, list): 338 for oid, serial in list: 339 if not isinstance(serial, str): 340 raise serial 341 self.data[oid] = serial 342 343 def store(self, oid, result): 344 if isinstance(result, str): 345 self.data[oid] = result 346 elif result is not None: 347 self._update_from_list(result) 348 349 def tpc_vote(self, result): 350 if result is not None: 351 self._update_from_list(result) 352 353 def get_dict(self): 354 return self.data 355 356 357if __name__ == '__main__': 358 main() 359