1"""Implementation of fast search algorithms
2Used in select when one of the search fields has a fixed-length size
3
4Instead of taking all the blocks in field files one after the other, a big
5number of blocks are read and the search is made on the buffer
6"""
7
8from datetime import date, datetime
9import itertools
10
11# compatibility with Python 2.3
12try:
13    set([1])
14except NameError:
15    from sets import Set as set
16
17def rev(s):
18    """ function used to compare strings in decreasing order"""
19    return ''.join([chr(255-ord(c)) for c in s])
20
21def make_search_func(db,field,value):
22    """Return the search function on a field
23    If value is a pair of values (v1,v2), all blocks between v1 and v2
24    will be returned ; if value is a single value then all blocks with
25    this value will be returned
26    """
27    bl = db._file[field].block_len  # block length
28    if isinstance(value,(list,tuple)):
29        value = list(value)
30        if not len(value)==2:
31            raise ValueError,"If argument is a list, only 2 values \
32                should be passed (found %s)" %len(value)
33        if not db.fields[field] in [int,float,date,datetime]:
34            raise TypeError,"Search between values is only allowed for " \
35                "int, float, date and datetime (found %s)" %db.fields[field]
36        db._validate(field,value[0])
37        db._validate(field,value[1])
38        value.sort()
39        # convert values in blocks (strings representation in field files)
40        s1,s2 = [ db.f_encode[db.fields[field]](v) for v in value ]
41        # search the common leading characters in s1 and s2
42        common = ''
43        for i in range(len(s1)):
44            if s1[i] == s2[i]:
45                common += s1[i]
46            else:
47                break
48        lc = len(common)
49        Min = s1[lc:] # part of s1 not common with s2
50        Max = s2[lc:] # part of s2 not common with s1
51
52        def _search(buf):
53            """Function searching blocks in the buffer such that
54            s1 <= block <= s2
55            Return a dictionary mapping rank of the block to the block
56
57            The algorithm searches occurences of 'common', then checks
58            that the rest of the block is between Min and Max
59            """
60            ranks = {}
61            pos = 0
62            while True:
63                # search occurences of the common part between s1 and s2
64                pos = buf.find(common,pos)
65                if pos == -1:
66                    break
67                if pos % bl == 0:
68                    # pos is a block start
69                    block = buf[pos:pos+bl]
70                    # rest of the block
71                    rest = buf[pos+lc:pos+bl]
72                    # compare rest of block to Min and Max
73                    if Min <= rest <= Max:
74                        ranks[pos/bl] = block
75                pos += 1
76            return ranks
77
78    else:
79        v = db.f_encode[db.fields[field]](value)
80
81        def _search(buf):
82            """Function searching blocks in the buffer such that
83            block == v
84            Return a dictionary mapping rank of the block to the block
85
86            The algorithm searches occurences of the block v in the
87            buffer
88            """
89            ranks = {}
90            pos = 0
91            while True:
92                pos = buf.find(v,pos)
93                if pos>-1:
94                    if pos % bl == 0:
95                        # pos is a block start
96                        ranks[pos/bl] = buf[pos:pos+bl]
97                    pos += 1
98                else:
99                    break
100            return ranks
101
102    return _search
103
104def fast_select(db,names,**args):
105    """Handles requests like select(['name'],age=23,name='pierre') when
106    one of the arg keys is fixed length type ; uses a fast search algo
107    instead of browsing all the records
108
109    The search functions are defined for all fixed-length arguments and
110    used to select a subset of record rows in field files
111    """
112    # fixed and variable length fields
113    f_args = [ (k,v) for k,v in args.iteritems()
114        if hasattr(db._file[k],'block_len') ]
115    v_args = [ (k,v) for (k,v) in args.iteritems()
116        if not hasattr(db._file[k],'block_len') ]
117    arg_names = [ k for k,v in f_args + v_args ]
118    no_args = [ n for n in names if not n in arg_names ]
119    names = arg_names + no_args
120
121    [ db._file[k].seek(0) for k in names + args.keys() ]
122    max_len = max([ db._file[k[0]].block_len for k in f_args ])
123    num_blocks = db.BLOCKSIZE / max_len
124    funcs = dict([(k,make_search_func(db,k,v))
125                    for (k,v) in f_args])
126    fl_ranks = [] # absolute ranks in fixed length files
127    bl_offset = 0 # offset of current chunck
128    res = {}
129    while True:
130        buf = {}
131        ranks = {}
132        # read a chunk of num_blocks blocks in each fixed-length file
133        for i,(k,v) in enumerate(f_args):
134            # rank[field] stores the rank of found values in
135            # the buffer, between 0 and num_blocks-1
136            bl = db._file[k].block_len
137            buf[k] = db._file[k].read(num_blocks*bl)
138            ranks[k] = funcs[k](buf[k])
139        # test end of files
140        if not buf[f_args[0][0]]:
141            break
142        # valid records are those with the same rank in all files
143        rank_set=set(ranks[f_args[0][0]].keys())
144        if len(f_args)>1:
145            for (k,v) in f_args[1:]:
146                rank_set = rank_set.intersection(set(ranks[k].keys()))
147        for c in rank_set:
148            res[bl_offset+c] = [ ranks[k][c] for k,v in f_args ]
149        bl_offset += num_blocks
150
151    fl_ranks = res.keys()
152    fl_ranks.sort()
153
154    # The field files for the other arguments are browsed ; if their
155    # row is in the subset, test if the value for variable length arguments
156    # is equal to the keyword value
157    vl_files = [ db._file[k] for k,v in v_args ]
158    nbvl = len(vl_files)
159    vl_values = tuple([ db._file[k].to_block(v) for (k,v) in v_args ])
160    no_args_files = [ db._file[k] for k in no_args ]
161    other_files = vl_files + no_args_files
162    for f in other_files:
163        f.seek(0)
164
165    for i,lines in enumerate(itertools.izip(*other_files)):
166        try:
167            if i == fl_ranks[0]:
168                fl_ranks.pop(0)
169                if lines[:nbvl] == vl_values:
170                    res[i]+=list(lines)
171                else:
172                    del res[i]
173        except IndexError:
174            break
175    return res,names
176