1 2from pprint import pprint 3import os 4import shutil 5import tempfile 6 7from time import time as now 8import random 9import gdbm 10 11MAP_SIZE = 1048576 * 400 12DB_PATH = '/ram/testdb-gdbm' 13 14if os.path.exists('/ram'): 15 DB_PATH = '/ram/testdb-gdbm' 16else: 17 DB_PATH = tempfile.mktemp(prefix='dirtybench-gdbm') 18 19 20def x(): 21 big = '' # '*' * 400 22 23 if os.path.exists(DB_PATH): 24 os.unlink(DB_PATH) 25 26 t0 = now() 27 words = set(file('/usr/share/dict/words').readlines()) 28 words.update([w.upper() for w in words]) 29 words.update([w[::-1] for w in words]) 30 words.update([w[::-1].upper() for w in words]) 31 words.update(['-'.join(w) for w in words]) 32 #words.update(['+'.join(w) for w in words]) 33 #words.update(['/'.join(w) for w in words]) 34 words = list(words) 35 alllen = sum(len(w) for w in words) 36 avglen = alllen / len(words) 37 print 'permutate %d words avglen %d took %.2fsec' % (len(words), avglen, now()-t0) 38 39 getword = iter(words).next 40 41 env = gdbm.open(DB_PATH, 'c') 42 43 run = True 44 t0 = now() 45 last = t0 46 while run: 47 try: 48 for _ in xrange(50000): 49 word = getword() 50 env[word] = big or word 51 except StopIteration: 52 run = False 53 54 t1 = now() 55 if (t1 - last) > 2: 56 print '%.2fs (%d/sec)' % (t1-t0, len(words)/(t1-t0)) 57 last = t1 58 59 t1 = now() 60 print 'done all %d in %.2fs (%d/sec)' % (len(words), t1-t0, len(words)/(t1-t0)) 61 last = t1 62 63 print 64 print 65 66 t0 = now() 67 lst = sum(env[k] and 1 for k in env.keys()) 68 t1 = now() 69 print 'enum %d (key, value) pairs took %.2f sec' % ((lst), t1-t0) 70 71 t0 = now() 72 lst = sum(1 or env[k] for k in reversed(env.keys())) 73 t1 = now() 74 print 'reverse enum %d (key, value) pairs took %.2f sec' % ((lst), t1-t0) 75 76 t0 = now() 77 for word in words: 78 env[word] 79 t1 = now() 80 print 'rand lookup all keys %.2f sec (%d/sec)' % (t1-t0, lst/(t1-t0)) 81 82 t0 = now() 83 for word in words: 84 hash(env[word]) 85 t1 = now() 86 print 'per txn rand lookup+hash all keys %.2f sec (%d/sec)' % (t1-t0, lst/(t1-t0)) 87 88 t0 = now() 89 for word in words: 90 hash(env[word]) 91 t1 = now() 92 print 'rand lookup+hash all keys %.2f sec (%d/sec)' % (t1-t0, lst/(t1-t0)) 93 94 t0 = now() 95 for word in words: 96 env[word] 97 t1 = now() 98 print 'rand lookup all buffers %.2f sec (%d/sec)' % (t1-t0, lst/(t1-t0)) 99 100 t0 = now() 101 for word in words: 102 hash(env[word]) 103 t1 = now() 104 print 'rand lookup+hash all buffers %.2f sec (%d/sec)' % (t1-t0, lst/(t1-t0)) 105 106 107 # 108 # get+put 109 # 110 111 getword = iter(sorted(words)).next 112 run = True 113 t0 = now() 114 last = t0 115 while run: 116 try: 117 for _ in xrange(50000): 118 word = getword() 119 old = env[word] 120 env[word] = word 121 except StopIteration: 122 run = False 123 124 t1 = now() 125 if (t1 - last) > 2: 126 print '%.2fs (%d/sec)' % (t1-t0, len(words)/(t1-t0)) 127 last = t1 128 129 t1 = now() 130 print 'get+put all %d in %.2fs (%d/sec)' % (len(words), t1-t0, len(words)/(t1-t0)) 131 last = t1 132 133 134 # 135 # REPLACE 136 # 137 138 getword = iter(sorted(words)).next 139 run = True 140 t0 = now() 141 last = t0 142 while run: 143 try: 144 for _ in xrange(50000): 145 word = getword() 146 old = env[word] 147 except StopIteration: 148 run = False 149 150 t1 = now() 151 if (t1 - last) > 2: 152 print '%.2fs (%d/sec)' % (t1-t0, len(words)/(t1-t0)) 153 last = t1 154 155 t1 = now() 156 print 'replace all %d in %.2fs (%d/sec)' % (len(words), t1-t0, len(words)/(t1-t0)) 157 last = t1 158 159 160 161 162x() 163