1from __future__ import print_function
2import os
3from time import time
4import random
5import numpy as np
6import tables
7
8# in order to always generate the same random sequence
9random.seed(19)
10np.random.seed((19, 20))
11
12
13def open_db(filename, remove=0):
14    if remove and os.path.exists(filename):
15        os.remove(filename)
16    con = tables.open_file(filename, 'a')
17    return con
18
19
20def create_db(filename, nrows):
21
22    class Record(tables.IsDescription):
23        col1 = tables.Int32Col()
24        col2 = tables.Int32Col()
25        col3 = tables.Float64Col()
26        col4 = tables.Float64Col()
27
28    con = open_db(filename, remove=1)
29    table = con.create_table(con.root, 'table', Record,
30                             filters=filters, expectedrows=nrows)
31    table.indexFilters = filters
32    step = 1000 * 100
33    scale = 0.1
34    t1 = time()
35    j = 0
36    for i in range(0, nrows, step):
37        stop = (j + 1) * step
38        if stop > nrows:
39            stop = nrows
40        arr_f8 = np.arange(i, stop, type=np.Float64)
41        arr_i4 = np.arange(i, stop, type=np.Int32)
42        if userandom:
43            arr_f8 += np.random.normal(0, stop * scale, shape=[stop - i])
44            arr_i4 = np.array(arr_f8, type=np.Int32)
45        recarr = np.rec.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8])
46        table.append(recarr)
47        j += 1
48    table.flush()
49    ctime = time() - t1
50    if verbose:
51        print("insert time:", round(ctime, 5))
52        print("Krows/s:", round((nrows / 1000.) / ctime, 5))
53    index_db(table)
54    close_db(con)
55
56
57def index_db(table):
58    t1 = time()
59    table.cols.col2.create_index()
60    itime = time() - t1
61    if verbose:
62        print("index time (int):", round(itime, 5))
63        print("Krows/s:", round((nrows / 1000.) / itime, 5))
64    t1 = time()
65    table.cols.col4.create_index()
66    itime = time() - t1
67    if verbose:
68        print("index time (float):", round(itime, 5))
69        print("Krows/s:", round((nrows / 1000.) / itime, 5))
70
71
72def query_db(filename, rng):
73    con = open_db(filename)
74    table = con.root.table
75    # Query for integer columns
76    # Query for non-indexed column
77    if not doqueryidx:
78        t1 = time()
79        ntimes = 10
80        for i in range(ntimes):
81            results = [
82                r['col1'] for r in table.where(
83                    rng[0] + i <= table.cols.col1 <= rng[1] + i)
84            ]
85        qtime = (time() - t1) / ntimes
86        if verbose:
87            print("query time (int, not indexed):", round(qtime, 5))
88            print("Mrows/s:", round((nrows / 1000.) / qtime, 5))
89            print(results)
90    # Query for indexed column
91    t1 = time()
92    ntimes = 10
93    for i in range(ntimes):
94        results = [
95            r['col1'] for r in table.where(
96                rng[0] + i <= table.cols.col2 <= rng[1] + i)
97        ]
98    qtime = (time() - t1) / ntimes
99    if verbose:
100        print("query time (int, indexed):", round(qtime, 5))
101        print("Mrows/s:", round((nrows / 1000.) / qtime, 5))
102        print(results)
103    # Query for floating columns
104    # Query for non-indexed column
105    if not doqueryidx:
106        t1 = time()
107        ntimes = 10
108        for i in range(ntimes):
109            results = [
110                r['col3'] for r in table.where(
111                    rng[0] + i <= table.cols.col3 <= rng[1] + i)
112            ]
113        qtime = (time() - t1) / ntimes
114        if verbose:
115            print("query time (float, not indexed):", round(qtime, 5))
116            print("Mrows/s:", round((nrows / 1000.) / qtime, 5))
117            print(results)
118    # Query for indexed column
119    t1 = time()
120    ntimes = 10
121    for i in range(ntimes):
122        results = [r['col3'] for r in
123                   table.where(rng[0] + i <= table.cols.col4 <= rng[1] + i)]
124    qtime = (time() - t1) / ntimes
125    if verbose:
126        print("query time (float, indexed):", round(qtime, 5))
127        print("Mrows/s:", round((nrows / 1000.) / qtime, 5))
128        print(results)
129    close_db(con)
130
131
132def close_db(con):
133    con.close()
134
135if __name__ == "__main__":
136    import sys
137    import getopt
138    try:
139        import psyco
140        psyco_imported = 1
141    except:
142        psyco_imported = 0
143
144    usage = """usage: %s [-v] [-p] [-m] [-c] [-q] [-i] [-z complevel] [-l complib] [-R range] [-n nrows] file
145            -v verbose
146            -p use "psyco" if available
147            -m use random values to fill the table
148            -q do a query (both indexed and non-indexed version)
149            -i do a query (exclude non-indexed version)
150            -c create the database
151            -z compress with zlib (no compression by default)
152            -l use complib for compression (zlib used by default)
153            -R select a range in a field in the form "start,stop" (def "0,10")
154            -n sets the number of rows (in krows) in each table
155            \n""" % sys.argv[0]
156
157    try:
158        opts, pargs = getopt.getopt(sys.argv[1:], 'vpmcqiz:l:R:n:')
159    except:
160        sys.stderr.write(usage)
161        sys.exit(0)
162
163    # default options
164    verbose = 0
165    usepsyco = 0
166    userandom = 0
167    docreate = 0
168    docompress = 0
169    complib = "zlib"
170    doquery = 0
171    doqueryidx = 0
172    rng = [0, 10]
173    nrows = 1
174
175    # Get the options
176    for option in opts:
177        if option[0] == '-v':
178            verbose = 1
179        elif option[0] == '-p':
180            usepsyco = 1
181        elif option[0] == '-m':
182            userandom = 1
183        elif option[0] == '-c':
184            docreate = 1
185            createindex = 1
186        elif option[0] == '-q':
187            doquery = 1
188        elif option[0] == '-i':
189            doqueryidx = 1
190        elif option[0] == '-z':
191            docompress = int(option[1])
192        elif option[0] == '-l':
193            complib = option[1]
194        elif option[0] == '-R':
195            rng = [int(i) for i in option[1].split(",")]
196        elif option[0] == '-n':
197            nrows = int(option[1])
198
199    # Catch the hdf5 file passed as the last argument
200    filename = pargs[0]
201
202    # The filters chosen
203    filters = tables.Filters(complevel=docompress, complib=complib)
204
205    if verbose:
206        print("pytables version:", tables.__version__)
207        if userandom:
208            print("using random values")
209        if doqueryidx:
210            print("doing indexed queries only")
211
212    if docreate:
213        if verbose:
214            print("writing %s krows" % nrows)
215        if psyco_imported and usepsyco:
216            psyco.bind(create_db)
217        nrows *= 1000
218        create_db(filename, nrows)
219
220    if doquery:
221        query_db(filename, rng)
222