1#!/usr/bin/env python 2 3"""This script allows to create arbitrarily large files with the desired 4combination of groups, tables per group and rows per table. 5 6Issue "python stress-test3.py" without parameters for a help on usage. 7 8""" 9 10from __future__ import print_function 11import gc 12import sys 13import time 14from tables import * 15 16 17class Test(IsDescription): 18 ngroup = Int32Col(pos=1) 19 ntable = Int32Col(pos=2) 20 nrow = Int32Col(pos=3) 21 string = StringCol(500, pos=4) 22 23 24def createFileArr(filename, ngroups, ntables, nrows): 25 26 # First, create the groups 27 28 # Open a file in "w"rite mode 29 fileh = open_file(filename, mode="w", title="PyTables Stress Test") 30 31 for k in range(ngroups): 32 # Create the group 33 fileh.create_group("/", 'group%04d' % k, "Group %d" % k) 34 35 fileh.close() 36 37 return (0, 4) 38 39 40def readFileArr(filename, ngroups, recsize, verbose): 41 42 rowsread = 0 43 for ngroup in range(ngroups): 44 fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) 45 # Get the group 46 group = fileh.root 47 ntable = 0 48 if verbose: 49 print("Group ==>", group) 50 for table in fileh.list_nodes(group, 'Array'): 51 if verbose > 1: 52 print("Array ==>", table) 53 print("Rows in", table._v_pathname, ":", table.shape) 54 55 arr = table.read() 56 57 rowsread += len(arr) 58 ntable += 1 59 60 # Close the file (eventually destroy the extended type) 61 fileh.close() 62 63 return (rowsread, 4, 0) 64 65 66def createFile(filename, ngroups, ntables, nrows, complevel, complib, recsize): 67 68 # First, create the groups 69 70 # Open a file in "w"rite mode 71 fileh = open_file(filename, mode="w", title="PyTables Stress Test") 72 73 for k in range(ngroups): 74 # Create the group 75 group = fileh.create_group("/", 'group%04d' % k, "Group %d" % k) 76 77 fileh.close() 78 79 # Now, create the tables 80 rowswritten = 0 81 for k in range(ngroups): 82 fileh = open_file(filename, mode="a", root_uep='group%04d' % k) 83 # Get the group 84 group = fileh.root 85 for j in range(ntables): 86 # Create a table 87 table = fileh.create_table(group, 'table%04d' % j, Test, 88 'Table%04d' % j, 89 Filters(complevel, complib), nrows) 90 rowsize = table.rowsize 91 # Get the row object associated with the new table 92 row = table.row 93 # Fill the table 94 for i in range(nrows): 95 row['ngroup'] = k 96 row['ntable'] = j 97 row['nrow'] = i 98 row.append() 99 100 rowswritten += nrows 101 table.flush() 102 103 # Close the file 104 fileh.close() 105 106 return (rowswritten, rowsize) 107 108 109def readFile(filename, ngroups, recsize, verbose): 110 # Open the HDF5 file in read-only mode 111 112 rowsread = 0 113 for ngroup in range(ngroups): 114 fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) 115 # Get the group 116 group = fileh.root 117 ntable = 0 118 if verbose: 119 print("Group ==>", group) 120 for table in fileh.list_nodes(group, 'Table'): 121 rowsize = table.rowsize 122 buffersize = table.rowsize * table.nrowsinbuf 123 if verbose > 1: 124 print("Table ==>", table) 125 print("Max rows in buf:", table.nrowsinbuf) 126 print("Rows in", table._v_pathname, ":", table.nrows) 127 print("Buffersize:", table.rowsize * table.nrowsinbuf) 128 print("MaxTuples:", table.nrowsinbuf) 129 130 nrow = 0 131 for row in table: 132 try: 133 assert row["ngroup"] == ngroup 134 assert row["ntable"] == ntable 135 assert row["nrow"] == nrow 136 except: 137 print("Error in group: %d, table: %d, row: %d" % 138 (ngroup, ntable, nrow)) 139 print("Record ==>", row) 140 nrow += 1 141 142 assert nrow == table.nrows 143 rowsread += table.nrows 144 ntable += 1 145 146 # Close the file (eventually destroy the extended type) 147 fileh.close() 148 149 return (rowsread, rowsize, buffersize) 150 151 152def dump_garbage(): 153 """show us waht the garbage is about.""" 154 # Force collection 155 print("\nGARBAGE:") 156 gc.collect() 157 158 print("\nGARBAGE OBJECTS:") 159 for x in gc.garbage: 160 s = str(x) 161 #if len(s) > 80: s = s[:77] + "..." 162 print(type(x), "\n ", s) 163 164if __name__ == "__main__": 165 import getopt 166 try: 167 import psyco 168 psyco_imported = 1 169 except: 170 psyco_imported = 0 171 172 usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file 173 -d debugging level 174 -v verbosity level 175 -p use "psyco" if available 176 -a use Array objects instead of Table 177 -r only read test 178 -w only write test 179 -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") 180 -c sets a compression level (do not set it or 0 for no compression) 181 -g number of groups hanging from "/" 182 -t number of tables per group 183 -i number of rows per table 184""" 185 186 try: 187 opts, pargs = getopt.getopt(sys.argv[1:], 'd:v:parwl:c:g:t:i:') 188 except: 189 sys.stderr.write(usage) 190 sys.exit(0) 191 192 # if we pass too much parameters, abort 193 if len(pargs) != 1: 194 sys.stderr.write(usage) 195 sys.exit(0) 196 197 # default options 198 ngroups = 5 199 ntables = 5 200 nrows = 100 201 verbose = 0 202 debug = 0 203 recsize = "medium" 204 testread = 1 205 testwrite = 1 206 usepsyco = 0 207 usearray = 0 208 complevel = 0 209 complib = "zlib" 210 211 # Get the options 212 for option in opts: 213 if option[0] == '-d': 214 debug = int(option[1]) 215 if option[0] == '-v': 216 verbose = int(option[1]) 217 if option[0] == '-p': 218 usepsyco = 1 219 if option[0] == '-a': 220 usearray = 1 221 elif option[0] == '-r': 222 testwrite = 0 223 elif option[0] == '-w': 224 testread = 0 225 elif option[0] == '-l': 226 complib = option[1] 227 elif option[0] == '-c': 228 complevel = int(option[1]) 229 elif option[0] == '-g': 230 ngroups = int(option[1]) 231 elif option[0] == '-t': 232 ntables = int(option[1]) 233 elif option[0] == '-i': 234 nrows = int(option[1]) 235 236 if debug: 237 gc.enable() 238 gc.set_debug(gc.DEBUG_LEAK) 239 240 # Catch the hdf5 file passed as the last argument 241 file = pargs[0] 242 243 print("Compression level:", complevel) 244 if complevel > 0: 245 print("Compression library:", complib) 246 if testwrite: 247 t1 = time.time() 248 cpu1 = time.perf_counter() 249 if psyco_imported and usepsyco: 250 psyco.bind(createFile) 251 if usearray: 252 (rowsw, rowsz) = createFileArr(file, ngroups, ntables, nrows) 253 else: 254 (rowsw, rowsz) = createFile(file, ngroups, ntables, nrows, 255 complevel, complib, recsize) 256 t2 = time.time() 257 cpu2 = time.perf_counter() 258 tapprows = round(t2 - t1, 3) 259 cpuapprows = round(cpu2 - cpu1, 3) 260 tpercent = int(round(cpuapprows / tapprows, 2) * 100) 261 print("Rows written:", rowsw, " Row size:", rowsz) 262 print("Time writing rows: %s s (real) %s s (cpu) %s%%" % 263 (tapprows, cpuapprows, tpercent)) 264 print("Write rows/sec: ", int(rowsw / float(tapprows))) 265 print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) 266 267 if testread: 268 t1 = time.time() 269 cpu1 = time.perf_counter() 270 if psyco_imported and usepsyco: 271 psyco.bind(readFile) 272 if usearray: 273 (rowsr, rowsz, bufsz) = readFileArr(file, 274 ngroups, recsize, verbose) 275 else: 276 (rowsr, rowsz, bufsz) = readFile(file, ngroups, recsize, verbose) 277 t2 = time.time() 278 cpu2 = time.perf_counter() 279 treadrows = round(t2 - t1, 3) 280 cpureadrows = round(cpu2 - cpu1, 3) 281 tpercent = int(round(cpureadrows / treadrows, 2) * 100) 282 print("Rows read:", rowsr, " Row size:", rowsz, "Buf size:", bufsz) 283 print("Time reading rows: %s s (real) %s s (cpu) %s%%" % 284 (treadrows, cpureadrows, tpercent)) 285 print("Read rows/sec: ", int(rowsr / float(treadrows))) 286 print("Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))) 287 288 # Show the dirt 289 if debug > 1: 290 dump_garbage() 291