1#!/usr/bin/env python
2
3"""This script allows to create arbitrarily large files with the desired
4combination of groups, tables per group and rows per table.
5
6Issue "python stress-test3.py" without parameters for a help on usage.
7
8"""
9
10from __future__ import print_function
11import gc
12import sys
13import time
14from tables import *
15
16
17class Test(IsDescription):
18    ngroup = Int32Col(pos=1)
19    ntable = Int32Col(pos=2)
20    nrow = Int32Col(pos=3)
21    string = StringCol(500, pos=4)
22
23
24def createFileArr(filename, ngroups, ntables, nrows):
25
26    # First, create the groups
27
28    # Open a file in "w"rite mode
29    fileh = open_file(filename, mode="w", title="PyTables Stress Test")
30
31    for k in range(ngroups):
32        # Create the group
33        fileh.create_group("/", 'group%04d' % k, "Group %d" % k)
34
35    fileh.close()
36
37    return (0, 4)
38
39
40def readFileArr(filename, ngroups, recsize, verbose):
41
42    rowsread = 0
43    for ngroup in range(ngroups):
44        fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup)
45        # Get the group
46        group = fileh.root
47        ntable = 0
48        if verbose:
49            print("Group ==>", group)
50        for table in fileh.list_nodes(group, 'Array'):
51            if verbose > 1:
52                print("Array ==>", table)
53                print("Rows in", table._v_pathname, ":", table.shape)
54
55            arr = table.read()
56
57            rowsread += len(arr)
58            ntable += 1
59
60        # Close the file (eventually destroy the extended type)
61        fileh.close()
62
63    return (rowsread, 4, 0)
64
65
66def createFile(filename, ngroups, ntables, nrows, complevel, complib, recsize):
67
68    # First, create the groups
69
70    # Open a file in "w"rite mode
71    fileh = open_file(filename, mode="w", title="PyTables Stress Test")
72
73    for k in range(ngroups):
74        # Create the group
75        group = fileh.create_group("/", 'group%04d' % k, "Group %d" % k)
76
77    fileh.close()
78
79    # Now, create the tables
80    rowswritten = 0
81    for k in range(ngroups):
82        fileh = open_file(filename, mode="a", root_uep='group%04d' % k)
83        # Get the group
84        group = fileh.root
85        for j in range(ntables):
86            # Create a table
87            table = fileh.create_table(group, 'table%04d' % j, Test,
88                                       'Table%04d' % j,
89                                       Filters(complevel, complib), nrows)
90            rowsize = table.rowsize
91            # Get the row object associated with the new table
92            row = table.row
93            # Fill the table
94            for i in range(nrows):
95                row['ngroup'] = k
96                row['ntable'] = j
97                row['nrow'] = i
98                row.append()
99
100            rowswritten += nrows
101            table.flush()
102
103        # Close the file
104        fileh.close()
105
106    return (rowswritten, rowsize)
107
108
109def readFile(filename, ngroups, recsize, verbose):
110    # Open the HDF5 file in read-only mode
111
112    rowsread = 0
113    for ngroup in range(ngroups):
114        fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup)
115        # Get the group
116        group = fileh.root
117        ntable = 0
118        if verbose:
119            print("Group ==>", group)
120        for table in fileh.list_nodes(group, 'Table'):
121            rowsize = table.rowsize
122            buffersize = table.rowsize * table.nrowsinbuf
123            if verbose > 1:
124                print("Table ==>", table)
125                print("Max rows in buf:", table.nrowsinbuf)
126                print("Rows in", table._v_pathname, ":", table.nrows)
127                print("Buffersize:", table.rowsize * table.nrowsinbuf)
128                print("MaxTuples:", table.nrowsinbuf)
129
130            nrow = 0
131            for row in table:
132                try:
133                    assert row["ngroup"] == ngroup
134                    assert row["ntable"] == ntable
135                    assert row["nrow"] == nrow
136                except:
137                    print("Error in group: %d, table: %d, row: %d" %
138                          (ngroup, ntable, nrow))
139                    print("Record ==>", row)
140                nrow += 1
141
142            assert nrow == table.nrows
143            rowsread += table.nrows
144            ntable += 1
145
146        # Close the file (eventually destroy the extended type)
147        fileh.close()
148
149    return (rowsread, rowsize, buffersize)
150
151
152def dump_garbage():
153    """show us waht the garbage is about."""
154    # Force collection
155    print("\nGARBAGE:")
156    gc.collect()
157
158    print("\nGARBAGE OBJECTS:")
159    for x in gc.garbage:
160        s = str(x)
161        #if len(s) > 80: s = s[:77] + "..."
162        print(type(x), "\n   ", s)
163
164if __name__ == "__main__":
165    import getopt
166    try:
167        import psyco
168        psyco_imported = 1
169    except:
170        psyco_imported = 0
171
172    usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file
173    -d debugging level
174    -v verbosity level
175    -p use "psyco" if available
176    -a use Array objects instead of Table
177    -r only read test
178    -w only write test
179    -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2")
180    -c sets a compression level (do not set it or 0 for no compression)
181    -g number of groups hanging from "/"
182    -t number of tables per group
183    -i number of rows per table
184"""
185
186    try:
187        opts, pargs = getopt.getopt(sys.argv[1:], 'd:v:parwl:c:g:t:i:')
188    except:
189        sys.stderr.write(usage)
190        sys.exit(0)
191
192    # if we pass too much parameters, abort
193    if len(pargs) != 1:
194        sys.stderr.write(usage)
195        sys.exit(0)
196
197    # default options
198    ngroups = 5
199    ntables = 5
200    nrows = 100
201    verbose = 0
202    debug = 0
203    recsize = "medium"
204    testread = 1
205    testwrite = 1
206    usepsyco = 0
207    usearray = 0
208    complevel = 0
209    complib = "zlib"
210
211    # Get the options
212    for option in opts:
213        if option[0] == '-d':
214            debug = int(option[1])
215        if option[0] == '-v':
216            verbose = int(option[1])
217        if option[0] == '-p':
218            usepsyco = 1
219        if option[0] == '-a':
220            usearray = 1
221        elif option[0] == '-r':
222            testwrite = 0
223        elif option[0] == '-w':
224            testread = 0
225        elif option[0] == '-l':
226            complib = option[1]
227        elif option[0] == '-c':
228            complevel = int(option[1])
229        elif option[0] == '-g':
230            ngroups = int(option[1])
231        elif option[0] == '-t':
232            ntables = int(option[1])
233        elif option[0] == '-i':
234            nrows = int(option[1])
235
236    if debug:
237        gc.enable()
238        gc.set_debug(gc.DEBUG_LEAK)
239
240    # Catch the hdf5 file passed as the last argument
241    file = pargs[0]
242
243    print("Compression level:", complevel)
244    if complevel > 0:
245        print("Compression library:", complib)
246    if testwrite:
247        t1 = time.time()
248        cpu1 = time.perf_counter()
249        if psyco_imported and usepsyco:
250            psyco.bind(createFile)
251        if usearray:
252            (rowsw, rowsz) = createFileArr(file, ngroups, ntables, nrows)
253        else:
254            (rowsw, rowsz) = createFile(file, ngroups, ntables, nrows,
255                                        complevel, complib, recsize)
256        t2 = time.time()
257        cpu2 = time.perf_counter()
258        tapprows = round(t2 - t1, 3)
259        cpuapprows = round(cpu2 - cpu1, 3)
260        tpercent = int(round(cpuapprows / tapprows, 2) * 100)
261        print("Rows written:", rowsw, " Row size:", rowsz)
262        print("Time writing rows: %s s (real) %s s (cpu)  %s%%" %
263              (tapprows, cpuapprows, tpercent))
264        print("Write rows/sec: ", int(rowsw / float(tapprows)))
265        print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024)))
266
267    if testread:
268        t1 = time.time()
269        cpu1 = time.perf_counter()
270        if psyco_imported and usepsyco:
271            psyco.bind(readFile)
272        if usearray:
273            (rowsr, rowsz, bufsz) = readFileArr(file,
274                                                ngroups, recsize, verbose)
275        else:
276            (rowsr, rowsz, bufsz) = readFile(file, ngroups, recsize, verbose)
277        t2 = time.time()
278        cpu2 = time.perf_counter()
279        treadrows = round(t2 - t1, 3)
280        cpureadrows = round(cpu2 - cpu1, 3)
281        tpercent = int(round(cpureadrows / treadrows, 2) * 100)
282        print("Rows read:", rowsr, " Row size:", rowsz, "Buf size:", bufsz)
283        print("Time reading rows: %s s (real) %s s (cpu)  %s%%" %
284              (treadrows, cpureadrows, tpercent))
285        print("Read rows/sec: ", int(rowsr / float(treadrows)))
286        print("Read KB/s :", int(rowsr * rowsz / (treadrows * 1024)))
287
288    # Show the dirt
289    if debug > 1:
290        dump_garbage()
291