1from __future__ import print_function 2import os 3from time import time 4import random 5import numpy as np 6import tables 7 8# in order to always generate the same random sequence 9random.seed(19) 10np.random.seed((19, 20)) 11 12 13def open_db(filename, remove=0): 14 if remove and os.path.exists(filename): 15 os.remove(filename) 16 con = tables.open_file(filename, 'a') 17 return con 18 19 20def create_db(filename, nrows): 21 22 class Record(tables.IsDescription): 23 col1 = tables.Int32Col() 24 col2 = tables.Int32Col() 25 col3 = tables.Float64Col() 26 col4 = tables.Float64Col() 27 28 con = open_db(filename, remove=1) 29 table = con.create_table(con.root, 'table', Record, 30 filters=filters, expectedrows=nrows) 31 table.indexFilters = filters 32 step = 1000 * 100 33 scale = 0.1 34 t1 = time() 35 j = 0 36 for i in range(0, nrows, step): 37 stop = (j + 1) * step 38 if stop > nrows: 39 stop = nrows 40 arr_f8 = np.arange(i, stop, type=np.Float64) 41 arr_i4 = np.arange(i, stop, type=np.Int32) 42 if userandom: 43 arr_f8 += np.random.normal(0, stop * scale, shape=[stop - i]) 44 arr_i4 = np.array(arr_f8, type=np.Int32) 45 recarr = np.rec.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8]) 46 table.append(recarr) 47 j += 1 48 table.flush() 49 ctime = time() - t1 50 if verbose: 51 print("insert time:", round(ctime, 5)) 52 print("Krows/s:", round((nrows / 1000.) / ctime, 5)) 53 index_db(table) 54 close_db(con) 55 56 57def index_db(table): 58 t1 = time() 59 table.cols.col2.create_index() 60 itime = time() - t1 61 if verbose: 62 print("index time (int):", round(itime, 5)) 63 print("Krows/s:", round((nrows / 1000.) / itime, 5)) 64 t1 = time() 65 table.cols.col4.create_index() 66 itime = time() - t1 67 if verbose: 68 print("index time (float):", round(itime, 5)) 69 print("Krows/s:", round((nrows / 1000.) / itime, 5)) 70 71 72def query_db(filename, rng): 73 con = open_db(filename) 74 table = con.root.table 75 # Query for integer columns 76 # Query for non-indexed column 77 if not doqueryidx: 78 t1 = time() 79 ntimes = 10 80 for i in range(ntimes): 81 results = [ 82 r['col1'] for r in table.where( 83 rng[0] + i <= table.cols.col1 <= rng[1] + i) 84 ] 85 qtime = (time() - t1) / ntimes 86 if verbose: 87 print("query time (int, not indexed):", round(qtime, 5)) 88 print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) 89 print(results) 90 # Query for indexed column 91 t1 = time() 92 ntimes = 10 93 for i in range(ntimes): 94 results = [ 95 r['col1'] for r in table.where( 96 rng[0] + i <= table.cols.col2 <= rng[1] + i) 97 ] 98 qtime = (time() - t1) / ntimes 99 if verbose: 100 print("query time (int, indexed):", round(qtime, 5)) 101 print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) 102 print(results) 103 # Query for floating columns 104 # Query for non-indexed column 105 if not doqueryidx: 106 t1 = time() 107 ntimes = 10 108 for i in range(ntimes): 109 results = [ 110 r['col3'] for r in table.where( 111 rng[0] + i <= table.cols.col3 <= rng[1] + i) 112 ] 113 qtime = (time() - t1) / ntimes 114 if verbose: 115 print("query time (float, not indexed):", round(qtime, 5)) 116 print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) 117 print(results) 118 # Query for indexed column 119 t1 = time() 120 ntimes = 10 121 for i in range(ntimes): 122 results = [r['col3'] for r in 123 table.where(rng[0] + i <= table.cols.col4 <= rng[1] + i)] 124 qtime = (time() - t1) / ntimes 125 if verbose: 126 print("query time (float, indexed):", round(qtime, 5)) 127 print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) 128 print(results) 129 close_db(con) 130 131 132def close_db(con): 133 con.close() 134 135if __name__ == "__main__": 136 import sys 137 import getopt 138 try: 139 import psyco 140 psyco_imported = 1 141 except: 142 psyco_imported = 0 143 144 usage = """usage: %s [-v] [-p] [-m] [-c] [-q] [-i] [-z complevel] [-l complib] [-R range] [-n nrows] file 145 -v verbose 146 -p use "psyco" if available 147 -m use random values to fill the table 148 -q do a query (both indexed and non-indexed version) 149 -i do a query (exclude non-indexed version) 150 -c create the database 151 -z compress with zlib (no compression by default) 152 -l use complib for compression (zlib used by default) 153 -R select a range in a field in the form "start,stop" (def "0,10") 154 -n sets the number of rows (in krows) in each table 155 \n""" % sys.argv[0] 156 157 try: 158 opts, pargs = getopt.getopt(sys.argv[1:], 'vpmcqiz:l:R:n:') 159 except: 160 sys.stderr.write(usage) 161 sys.exit(0) 162 163 # default options 164 verbose = 0 165 usepsyco = 0 166 userandom = 0 167 docreate = 0 168 docompress = 0 169 complib = "zlib" 170 doquery = 0 171 doqueryidx = 0 172 rng = [0, 10] 173 nrows = 1 174 175 # Get the options 176 for option in opts: 177 if option[0] == '-v': 178 verbose = 1 179 elif option[0] == '-p': 180 usepsyco = 1 181 elif option[0] == '-m': 182 userandom = 1 183 elif option[0] == '-c': 184 docreate = 1 185 createindex = 1 186 elif option[0] == '-q': 187 doquery = 1 188 elif option[0] == '-i': 189 doqueryidx = 1 190 elif option[0] == '-z': 191 docompress = int(option[1]) 192 elif option[0] == '-l': 193 complib = option[1] 194 elif option[0] == '-R': 195 rng = [int(i) for i in option[1].split(",")] 196 elif option[0] == '-n': 197 nrows = int(option[1]) 198 199 # Catch the hdf5 file passed as the last argument 200 filename = pargs[0] 201 202 # The filters chosen 203 filters = tables.Filters(complevel=docompress, complib=complib) 204 205 if verbose: 206 print("pytables version:", tables.__version__) 207 if userandom: 208 print("using random values") 209 if doqueryidx: 210 print("doing indexed queries only") 211 212 if docreate: 213 if verbose: 214 print("writing %s krows" % nrows) 215 if psyco_imported and usepsyco: 216 psyco.bind(create_db) 217 nrows *= 1000 218 create_db(filename, nrows) 219 220 if doquery: 221 query_db(filename, rng) 222