1 /* MDB Tools - A library for reading MS Access database file
2 * Copyright (C) 2000-2004 Brian Bruns
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include "mdbtools.h"
21
22 #ifdef DMALLOC
23 #include "dmalloc.h"
24 #endif
25
26 MdbIndexPage *mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain);
27 MdbIndexPage *mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg);
28
29 char idx_to_text[] = {
30 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0-7 0x00-0x07 */
31 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8-15 0x09-0x0f */
32 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16-23 0x10-0x17 */
33 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24-31 0x19-0x1f */
34 ' ', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 32-39 0x20-0x27 */
35 0x00, 0x00, 0x00, 0x00, 0x00, ' ', ' ', 0x00, /* 40-47 0x29-0x2f */
36 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', /* 48-55 0x30-0x37 */
37 '^', '_', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56-63 0x39-0x3f */
38 0x00, '`', 'a', 'b', 'd', 'f', 'g', 'h', /* 64-71 0x40-0x47 */
39 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'r', /* 72-79 0x49-0x4f H */
40 's', 't', 'u', 'v', 'w', 'x', 'z', '{', /* 80-87 0x50-0x57 P */
41 '|', '}', '~', '5', '6', '7', '8', '9', /* 88-95 0x59-0x5f */
42 0x00, '`', 'a', 'b', 'd', 'f', 'g', 'h', /* 96-103 0x60-0x67 */
43 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'r', /* 014-111 0x69-0x6f h */
44 's', 't', 'u', 'v', 'w', 'x', 'z', '{', /* 112-119 0x70-0x77 p */
45 '|', '}', '~', 0x00, 0x00, 0x00, 0x00, 0x00, /* 120-127 0x78-0x7f */
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 128-135 0x80-0x87 */
47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */
52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */
54 0x00, 0x00, 0x00, 0x00, 0x00, '`', 0x00, 0x00, /* 0xc0-0xc7 */
55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
56 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
57 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
58 0x00, '`', 0x00, '`', '`', '`', 0x00, 0x00, /* 0xe0-0xe7 */
59 'f', 'f', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
60 0x00, 0x00, 0x00, 'r', 0x00, 0x00, 'r', 0x00, /* 0xf0-0xf7 */
61 0x81, 0x00, 0x00, 0x00, 'x', 0x00, 0x00, 0x00, /* 0xf8-0xff */
62 };
63
64
65 GPtrArray *
mdb_read_indices(MdbTableDef * table)66 mdb_read_indices(MdbTableDef *table)
67 {
68 MdbCatalogEntry *entry = table->entry;
69 MdbHandle *mdb = entry->mdb;
70 MdbFormatConstants *fmt = mdb->fmt;
71 MdbIndex *pidx;
72 unsigned int i, j;
73 int idx_num, key_num, col_num;
74 int cur_pos, name_sz, idx2_sz, type_offset;
75 int index_start_pg = mdb->cur_pg;
76 gchar *tmpbuf;
77
78 table->indices = g_ptr_array_new();
79
80 if (IS_JET4(mdb)) {
81 cur_pos = table->index_start + 52 * table->num_real_idxs;
82 idx2_sz = 28;
83 type_offset = 23;
84 } else {
85 cur_pos = table->index_start + 39 * table->num_real_idxs;
86 idx2_sz = 20;
87 type_offset = 19;
88 }
89
90 tmpbuf = (gchar *) g_malloc(idx2_sz);
91 for (i=0;i<table->num_idxs;i++) {
92 read_pg_if_n(mdb, tmpbuf, &cur_pos, idx2_sz);
93 cur_pos += idx2_sz;
94 pidx = (MdbIndex *) g_malloc0(sizeof(MdbIndex));
95 pidx->table = table;
96 pidx->index_num = mdb_get_int16(tmpbuf, 4);
97 pidx->index_type = tmpbuf[type_offset];
98 g_ptr_array_add(table->indices, pidx);
99 }
100 g_free(tmpbuf);
101
102 for (i=0;i<table->num_idxs;i++) {
103 pidx = g_ptr_array_index (table->indices, i);
104 if (IS_JET4(mdb)) {
105 name_sz=read_pg_if_16(mdb, &cur_pos);
106 cur_pos += 2;
107 } else {
108 read_pg_if(mdb, &cur_pos, 0);
109 name_sz=mdb->pg_buf[cur_pos++];
110 }
111 tmpbuf = g_malloc(name_sz);
112 read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz);
113 cur_pos += name_sz;
114 mdb_unicode2ascii(mdb, tmpbuf, name_sz, pidx->name, MDB_MAX_OBJ_NAME);
115 g_free(tmpbuf);
116 //fprintf(stderr, "index name %s\n", pidx->name);
117 }
118
119 mdb_read_alt_pg(mdb, entry->table_pg);
120 mdb_read_pg(mdb, index_start_pg);
121 cur_pos = table->index_start;
122 idx_num=0;
123 for (i=0;i<table->num_real_idxs;i++) {
124 if (IS_JET4(mdb)) cur_pos += 4;
125 do {
126 pidx = g_ptr_array_index (table->indices, idx_num++);
127 } while (pidx && pidx->index_type==2);
128
129 /* if there are more real indexes than index entries left after
130 removing type 2's decrement real indexes and continue. Happens
131 on Northwind Orders table.
132 */
133 if (!pidx) {
134 table->num_real_idxs--;
135 continue;
136 }
137
138 pidx->num_rows = mdb_get_int32(mdb->alt_pg_buf,
139 fmt->tab_cols_start_offset +
140 (i*fmt->tab_ridx_entry_size));
141
142 key_num=0;
143 for (j=0;j<MDB_MAX_IDX_COLS;j++) {
144 col_num=read_pg_if_16(mdb,&cur_pos);
145 cur_pos += 2;
146 read_pg_if(mdb, &cur_pos, 0);
147 cur_pos++;
148 if (col_num == 0xFFFF)
149 continue;
150 /* set column number to a 1 based column number and store */
151 pidx->key_col_num[key_num] = col_num + 1;
152 pidx->key_col_order[key_num] =
153 (mdb->pg_buf[cur_pos-1]) ? MDB_ASC : MDB_DESC;
154 key_num++;
155 }
156 pidx->num_keys = key_num;
157
158 cur_pos += 4;
159 pidx->first_pg = read_pg_if_32(mdb, &cur_pos);
160 cur_pos += 4;
161 read_pg_if(mdb, &cur_pos, 0);
162 pidx->flags = mdb->pg_buf[cur_pos++];
163 if (IS_JET4(mdb)) cur_pos += 9;
164 }
165 return NULL;
166 }
167 void
mdb_index_hash_text(guchar * text,guchar * hash)168 mdb_index_hash_text(guchar *text, guchar *hash)
169 {
170 unsigned int k;
171
172 for (k=0;k<strlen(text);k++) {
173 hash[k] = idx_to_text[text[k]];
174 if (!(hash[k])) fprintf(stderr,
175 "No translation available for %02x %d\n",
176 text[k],text[k]);
177 }
178 hash[strlen(text)]=0;
179 }
180 /*
181 * reverse the order of the column for hashing
182 */
183 void
mdb_index_swap_n(unsigned char * src,int sz,unsigned char * dest)184 mdb_index_swap_n(unsigned char *src, int sz, unsigned char *dest)
185 {
186 int i, j = 0;
187
188 for (i = sz-1; i >= 0; i--) {
189 dest[j++] = src[i];
190 }
191 }
192 void
mdb_index_cache_sarg(MdbColumn * col,MdbSarg * sarg,MdbSarg * idx_sarg)193 mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg)
194 {
195 //guint32 cache_int;
196 unsigned char *c;
197
198 switch (col->col_type) {
199 case MDB_TEXT:
200 mdb_index_hash_text(sarg->value.s, idx_sarg->value.s);
201 break;
202
203 case MDB_LONGINT:
204 idx_sarg->value.i = GUINT32_SWAP_LE_BE(sarg->value.i);
205 //cache_int = sarg->value.i * -1;
206 c = (unsigned char *) &(idx_sarg->value.i);
207 c[0] |= 0x80;
208 //printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]);
209 break;
210
211 case MDB_INT:
212 break;
213
214 default:
215 break;
216 }
217 }
218 #if 0
219 int
220 mdb_index_test_sarg(MdbHandle *mdb, MdbColumn *col, MdbSarg *sarg, int offset, int len)
221 {
222 char tmpbuf[256];
223 int lastchar;
224
225 switch (col->col_type) {
226 case MDB_BYTE:
227 return mdb_test_int(sarg, mdb_pg_get_byte(mdb, offset));
228 break;
229 case MDB_INT:
230 return mdb_test_int(sarg, mdb_pg_get_int16(mdb, offset));
231 break;
232 case MDB_LONGINT:
233 return mdb_test_int(sarg, mdb_pg_get_int32(mdb, offset));
234 break;
235 case MDB_TEXT:
236 strncpy(tmpbuf, &mdb->pg_buf[offset],255);
237 lastchar = len > 255 ? 255 : len;
238 tmpbuf[lastchar]='\0';
239 return mdb_test_string(sarg, tmpbuf);
240 default:
241 fprintf(stderr, "Calling mdb_test_sarg on unknown type. Add code to mdb_test_sarg() for type %d\n",col->col_type);
242 break;
243 }
244 return 1;
245 }
246 #endif
247 int
mdb_index_test_sargs(MdbHandle * mdb,MdbIndex * idx,unsigned char * buf,int len)248 mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, unsigned char *buf, int len)
249 {
250 unsigned int i, j;
251 MdbColumn *col;
252 MdbTableDef *table = idx->table;
253 MdbSarg *idx_sarg;
254 MdbSarg *sarg;
255 MdbField field;
256 MdbSargNode node;
257 //int c_offset = 0,
258 int c_len;
259
260 //fprintf(stderr,"mdb_index_test_sargs called on ");
261 //for (i=0;i<len;i++)
262 //fprintf(stderr,"%02x ",buf[i]); //mdb->pg_buf[offset+i]);
263 //fprintf(stderr,"\n");
264 for (i=0;i<idx->num_keys;i++) {
265 //c_offset++; /* the per column null indicator/flags */
266 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
267 /*
268 * This will go away eventually
269 */
270 if (col->col_type==MDB_TEXT) {
271 //c_len = strlen(&mdb->pg_buf[offset + c_offset]);
272 c_len = strlen(buf);
273 } else {
274 c_len = col->col_size;
275 //fprintf(stderr,"Only text types currently supported. How did we get here?\n");
276 }
277 /*
278 * If we have no cached index values for this column,
279 * create them.
280 */
281 if (col->num_sargs && !col->idx_sarg_cache) {
282 col->idx_sarg_cache = g_ptr_array_new();
283 for (j=0;j<col->num_sargs;j++) {
284 sarg = g_ptr_array_index (col->sargs, j);
285 idx_sarg = g_memdup(sarg,sizeof(MdbSarg));
286 //printf("calling mdb_index_cache_sarg\n");
287 mdb_index_cache_sarg(col, sarg, idx_sarg);
288 g_ptr_array_add(col->idx_sarg_cache, idx_sarg);
289 }
290 }
291
292 for (j=0;j<col->num_sargs;j++) {
293 sarg = g_ptr_array_index (col->idx_sarg_cache, j);
294 /* XXX - kludge */
295 node.op = sarg->op;
296 node.value = sarg->value;
297 //field.value = &mdb->pg_buf[offset + c_offset];
298 field.value = buf;
299 field.siz = c_len;
300 field.is_null = FALSE;
301 if (!mdb_test_sarg(mdb, col, &node, &field)) {
302 /* sarg didn't match, no sense going on */
303 return 0;
304 }
305 }
306 }
307 return 1;
308 }
309 /*
310 * pack the pages bitmap
311 */
312 int
mdb_index_pack_bitmap(MdbHandle * mdb,MdbIndexPage * ipg)313 mdb_index_pack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg)
314 {
315 int mask_bit = 0;
316 int mask_pos = 0x16;
317 int mask_byte = 0;
318 int elem = 0;
319 int len, start, i;
320
321 start = ipg->idx_starts[elem++];
322
323 while (start) {
324 //fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]);
325 len = ipg->idx_starts[elem] - start;
326 //fprintf(stdout, "len is %d\n", len);
327 for (i=0; i < len; i++) {
328 mask_bit++;
329 if (mask_bit==8) {
330 mask_bit=0;
331 mdb->pg_buf[mask_pos++] = mask_byte;
332 mask_byte = 0;
333 }
334 /* upon reaching the len, set the bit */
335 }
336 mask_byte = (1 << mask_bit) | mask_byte;
337 //fprintf(stdout, "mask byte is %02x at %d\n", mask_byte, mask_pos);
338 start = ipg->idx_starts[elem++];
339 }
340 /* flush the last byte if any */
341 mdb->pg_buf[mask_pos++] = mask_byte;
342 /* remember to zero the rest of the bitmap */
343 for (i = mask_pos; i < 0xf8; i++) {
344 mdb->pg_buf[mask_pos++] = 0;
345 }
346 return 0;
347 }
348 /*
349 * unpack the pages bitmap
350 */
351 int
mdb_index_unpack_bitmap(MdbHandle * mdb,MdbIndexPage * ipg)352 mdb_index_unpack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg)
353 {
354 int mask_bit = 0;
355 int mask_pos = 0x16;
356 int mask_byte;
357 int start = 0xf8;
358 int elem = 0;
359 int len = 0;
360
361 ipg->idx_starts[elem++]=start;
362
363 //fprintf(stdout, "Unpacking index page %lu\n", ipg->pg);
364 do {
365 len = 0;
366 do {
367 mask_bit++;
368 if (mask_bit==8) {
369 mask_bit=0;
370 mask_pos++;
371 }
372 mask_byte = mdb->pg_buf[mask_pos];
373 len++;
374 } while (mask_pos <= 0xf8 && !((1 << mask_bit) & mask_byte));
375 //fprintf(stdout, "%d %d %d %d\n", mask_pos, mask_bit, mask_byte, len);
376
377 start += len;
378 if (mask_pos < 0xf8) ipg->idx_starts[elem++]=start;
379
380 } while (mask_pos < 0xf8);
381
382 /* if we zero the next element, so we don't pick up the last pages starts*/
383 ipg->idx_starts[elem]=0;
384
385 return elem;
386 }
387 /*
388 * find the next entry on a page (either index or leaf). Uses state information
389 * stored in the MdbIndexPage across calls.
390 */
391 int
mdb_index_find_next_on_page(MdbHandle * mdb,MdbIndexPage * ipg)392 mdb_index_find_next_on_page(MdbHandle *mdb, MdbIndexPage *ipg)
393 {
394 if (!ipg->pg) return 0;
395
396 /* if this page has not been unpacked to it */
397 if (!ipg->idx_starts[0]){
398 //fprintf(stdout, "Unpacking page %d\n", ipg->pg);
399 mdb_index_unpack_bitmap(mdb, ipg);
400 }
401
402
403 if (ipg->idx_starts[ipg->start_pos + 1]==0) return 0;
404 ipg->len = ipg->idx_starts[ipg->start_pos+1] - ipg->idx_starts[ipg->start_pos];
405 ipg->start_pos++;
406 //fprintf(stdout, "Start pos %d\n", ipg->start_pos);
407
408 return ipg->len;
409 }
mdb_index_page_reset(MdbIndexPage * ipg)410 void mdb_index_page_reset(MdbIndexPage *ipg)
411 {
412 ipg->offset = 0xf8; /* start byte of the index entries */
413 ipg->start_pos=0;
414 ipg->len = 0;
415 ipg->idx_starts[0]=0;
416 }
mdb_index_page_init(MdbIndexPage * ipg)417 void mdb_index_page_init(MdbIndexPage *ipg)
418 {
419 memset(ipg, 0, sizeof(MdbIndexPage));
420 mdb_index_page_reset(ipg);
421 }
422 /*
423 * find the next leaf page if any given a chain. Assumes any exhausted leaf
424 * pages at the end of the chain have been peeled off before the call.
425 */
426 MdbIndexPage *
mdb_find_next_leaf(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)427 mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
428 {
429 MdbIndexPage *ipg, *newipg;
430 guint32 pg;
431 guint passed = 0;
432
433 ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
434
435 /*
436 * If we are at the first page deep and it's not an index page then
437 * we are simply done. (there is no page to find
438 */
439
440 if (mdb->pg_buf[0]==MDB_PAGE_LEAF) {
441 /* Indexes can have leaves at the end that don't appear
442 * in the upper tree, stash the last index found so
443 * we can follow it at the end. */
444 chain->last_leaf_found = ipg->pg;
445 return ipg;
446 }
447
448 /*
449 * apply sargs here, currently we don't
450 */
451 do {
452 ipg->len = 0;
453 //printf("finding next on pg %lu\n", ipg->pg);
454 if (!mdb_index_find_next_on_page(mdb, ipg)) {
455 //printf("find_next_on_page returned 0\n");
456 return 0;
457 }
458 pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 3);
459 //printf("Looking at pg %lu at %lu %d\n", pg, ipg->offset, ipg->len);
460 ipg->offset += ipg->len;
461
462 /*
463 * add to the chain and call this function
464 * recursively.
465 */
466 newipg = mdb_chain_add_page(mdb, chain, pg);
467 newipg = mdb_find_next_leaf(mdb, idx, chain);
468 //printf("returning pg %lu\n",newipg->pg);
469 return newipg;
470 } while (!passed);
471 /* no more pages */
472 return NULL;
473
474 }
475 MdbIndexPage *
mdb_chain_add_page(MdbHandle * mdb,MdbIndexChain * chain,guint32 pg)476 mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg)
477 {
478 MdbIndexPage *ipg;
479
480 chain->cur_depth++;
481 if (chain->cur_depth > MDB_MAX_INDEX_DEPTH) {
482 fprintf(stderr,"Error! maximum index depth of %d exceeded. This is probably due to a programming bug, If you are confident that your indexes really are this deep, adjust MDB_MAX_INDEX_DEPTH in mdbtools.h and recompile.\n", MDB_MAX_INDEX_DEPTH);
483 exit(1);
484 }
485 ipg = &(chain->pages[chain->cur_depth - 1]);
486 mdb_index_page_init(ipg);
487 ipg->pg = pg;
488
489 return ipg;
490 }
491 /*
492 * returns the bottom page of the IndexChain, if IndexChain is empty it
493 * initializes it by reading idx->first_pg (the root page)
494 */
495 MdbIndexPage *
mdb_index_read_bottom_pg(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)496 mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
497 {
498 MdbIndexPage *ipg;
499
500 /*
501 * if it's new use the root index page (idx->first_pg)
502 */
503 if (!chain->cur_depth) {
504 ipg = &(chain->pages[0]);
505 mdb_index_page_init(ipg);
506 chain->cur_depth = 1;
507 ipg->pg = idx->first_pg;
508 if (!(ipg = mdb_find_next_leaf(mdb, idx, chain)))
509 return 0;
510 } else {
511 ipg = &(chain->pages[chain->cur_depth - 1]);
512 ipg->len = 0;
513 }
514
515 mdb_read_pg(mdb, ipg->pg);
516
517 return ipg;
518 }
519 /*
520 * unwind the stack and search for new leaf node
521 */
522 MdbIndexPage *
mdb_index_unwind(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)523 mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
524 {
525 MdbIndexPage *ipg;
526
527 //printf("page %lu finished\n",ipg->pg);
528 if (chain->cur_depth==1) {
529 //printf("cur_depth == 1 we're out\n");
530 return NULL;
531 }
532 /*
533 * unwind the stack until we find something or reach
534 * the top.
535 */
536 ipg = NULL;
537 while (chain->cur_depth>1 && ipg==NULL) {
538 //printf("chain depth %d\n", chain->cur_depth);
539 chain->cur_depth--;
540 ipg = mdb_find_next_leaf(mdb, idx, chain);
541 if (ipg) mdb_index_find_next_on_page(mdb, ipg);
542 }
543 if (chain->cur_depth==1) {
544 //printf("last leaf %lu\n", chain->last_leaf_found);
545 return NULL;
546 }
547 return ipg;
548 }
549 /*
550 * the main index function.
551 * caller provides an index chain which is the current traversal of index
552 * pages from the root page to the leaf. Initially passed as blank,
553 * mdb_index_find_next will store it's state information here. Each invocation
554 * then picks up where the last one left off, allowing us to scroll through
555 * the index one by one.
556 *
557 * Sargs are applied here but also need to be applied on the whole row b/c
558 * text columns may return false positives due to hashing and non-index
559 * columns with sarg values can't be tested here.
560 */
561 int
mdb_index_find_next(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain,guint32 * pg,guint16 * row)562 mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 *pg, guint16 *row)
563 {
564 MdbIndexPage *ipg;
565 int passed = 0;
566 int idx_sz;
567 int idx_start = 0;
568 MdbColumn *col;
569
570 ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
571
572 /*
573 * loop while the sargs don't match
574 */
575 do {
576 ipg->len = 0;
577 /*
578 * if no more rows on this leaf, try to find a new leaf
579 */
580 if (!mdb_index_find_next_on_page(mdb, ipg)) {
581 if (!chain->clean_up_mode) {
582 if (!(ipg = mdb_index_unwind(mdb, idx, chain)))
583 chain->clean_up_mode = 1;
584 }
585 if (chain->clean_up_mode) {
586 //fprintf(stdout,"in cleanup mode\n");
587
588 if (!chain->last_leaf_found) return 0;
589 mdb_read_pg(mdb, chain->last_leaf_found);
590 chain->last_leaf_found = mdb_pg_get_int24(mdb, 0x0c);
591 //printf("next leaf %lu\n", chain->last_leaf_found);
592 mdb_read_pg(mdb, chain->last_leaf_found);
593 /* reuse the chain for cleanup mode */
594 chain->cur_depth = 1;
595 ipg = &chain->pages[0];
596 mdb_index_page_init(ipg);
597 ipg->pg = chain->last_leaf_found;
598 //printf("next on page %d\n",
599 if (!mdb_index_find_next_on_page(mdb, ipg))
600 return 0;
601 }
602 }
603 *row = mdb->pg_buf[ipg->offset + ipg->len - 1];
604 *pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4);
605 //printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len);
606 col=g_ptr_array_index(idx->table->columns,idx->key_col_num[0]-1);
607 idx_sz = mdb_col_fixed_size(col);
608 /* handle compressed indexes, single key indexes only? */
609 if (idx->num_keys==1 && idx_sz>0 && ipg->len - 4 < idx_sz) {
610 //printf("short index found\n");
611 //buffer_dump(ipg->cache_value, 0, idx_sz);
612 memcpy(&ipg->cache_value[idx_sz - (ipg->len - 4)], &mdb->pg_buf[ipg->offset], ipg->len);
613 //buffer_dump(ipg->cache_value, 0, idx_sz);
614 } else {
615 idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
616 memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz);
617 }
618
619 //idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
620 passed = mdb_index_test_sargs(mdb, idx, ipg->cache_value, idx_sz);
621
622 ipg->offset += ipg->len;
623 } while (!passed);
624
625 //fprintf(stdout,"len = %d pos %d\n", ipg->len, ipg->mask_pos);
626 //buffer_dump(mdb->pg_buf, ipg->offset, ipg->offset+ipg->len-1);
627
628 return ipg->len;
629 }
630 /*
631 * XXX - FIX ME
632 * This function is grossly inefficient. It scans the entire index building
633 * an IndexChain to a specific row. We should be checking the index pages
634 * for matches against the indexed fields to find the proper leaf page, but
635 * getting it working first and then make it fast!
636 */
637 int
mdb_index_find_row(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain,guint32 pg,guint16 row)638 mdb_index_find_row(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 pg, guint16 row)
639 {
640 MdbIndexPage *ipg;
641 int passed = 0;
642 guint32 datapg;
643 guint16 datarow;
644
645 ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
646
647 do {
648 ipg->len = 0;
649 /*
650 * if no more rows on this leaf, try to find a new leaf
651 */
652 if (!mdb_index_find_next_on_page(mdb, ipg)) {
653 /* back to top? We're done */
654 if (chain->cur_depth==1)
655 return 0;
656
657 /*
658 * unwind the stack until we find something or reach
659 * the top.
660 */
661 while (chain->cur_depth>1) {
662 chain->cur_depth--;
663 if (!(ipg = mdb_find_next_leaf(mdb, idx, chain)))
664 return 0;
665 mdb_index_find_next_on_page(mdb, ipg);
666 }
667 if (chain->cur_depth==1)
668 return 0;
669 }
670 /* test row and pg */
671 datarow = mdb->pg_buf[ipg->offset + ipg->len - 1];
672 datapg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4);
673
674 if (datapg == pg && datarow == row) {
675 passed = 1;
676 }
677 ipg->offset += ipg->len;
678 } while (!passed);
679
680 /* index chain from root to leaf should now be in "chain" */
681 return 1;
682 }
683
mdb_index_walk(MdbTableDef * table,MdbIndex * idx)684 void mdb_index_walk(MdbTableDef *table, MdbIndex *idx)
685 {
686 MdbHandle *mdb = table->entry->mdb;
687 int cur_pos = 0;
688 unsigned char marker;
689 MdbColumn *col;
690 unsigned int i;
691
692 if (idx->num_keys!=1) return;
693
694 mdb_read_pg(mdb, idx->first_pg);
695 cur_pos = 0xf8;
696
697 for (i=0;i<idx->num_keys;i++) {
698 marker = mdb->pg_buf[cur_pos++];
699 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
700 //printf("column %d coltype %d col_size %d (%d)\n",i,col->col_type, mdb_col_fixed_size(col), col->col_size);
701 }
702 }
703 void
mdb_index_dump(MdbTableDef * table,MdbIndex * idx)704 mdb_index_dump(MdbTableDef *table, MdbIndex *idx)
705 {
706 unsigned int i;
707 MdbColumn *col;
708
709 fprintf(stdout,"index number %d\n", idx->index_num);
710 fprintf(stdout,"index name %s\n", idx->name);
711 fprintf(stdout,"index first page %d\n", idx->first_pg);
712 fprintf(stdout,"index rows %d\n", idx->num_rows);
713 if (idx->index_type==1) fprintf(stdout,"index is a primary key\n");
714 for (i=0;i<idx->num_keys;i++) {
715 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
716 fprintf(stdout,"Column %s(%d) Sorted %s Unique: %s\n",
717 col->name,
718 idx->key_col_num[i],
719 idx->key_col_order[i]==MDB_ASC ? "ascending" : "descending",
720 idx->flags & MDB_IDX_UNIQUE ? "Yes" : "No"
721 );
722 }
723 mdb_index_walk(table, idx);
724 }
725 /*
726 * compute_cost tries to assign a cost to a given index using the sargs
727 * available in this query.
728 *
729 * Indexes with no matching sargs are assigned 0
730 * Unique indexes are preferred over non-uniques
731 * Operator preference is equal, like, isnull, others
732 */
mdb_index_compute_cost(MdbTableDef * table,MdbIndex * idx)733 int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx)
734 {
735 unsigned int i;
736 MdbColumn *col;
737 MdbSarg *sarg = NULL;
738 int not_all_equal = 0;
739
740 if (!idx->num_keys) return 0;
741 if (idx->num_keys > 1) {
742 for (i=0;i<idx->num_keys;i++) {
743 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
744 if (col->sargs) sarg = g_ptr_array_index (col->sargs, 0);
745 if (!sarg || sarg->op != MDB_EQUAL) not_all_equal++;
746 }
747 }
748
749 col=g_ptr_array_index(table->columns,idx->key_col_num[0]-1);
750 /*
751 * if this is the first key column and there are no sargs,
752 * then this index is useless.
753 */
754 if (!col->num_sargs) return 0;
755
756 sarg = g_ptr_array_index (col->sargs, 0);
757
758 /*
759 * a like with a wild card first is useless as a sarg */
760 if (sarg->op == MDB_LIKE && sarg->value.s[0]=='%')
761 return 0;
762
763 /*
764 * this needs a lot of tweaking.
765 */
766 if (idx->flags & MDB_IDX_UNIQUE) {
767 if (idx->num_keys == 1) {
768 //printf("op is %d\n", sarg->op);
769 switch (sarg->op) {
770 case MDB_EQUAL:
771 return 1; break;
772 case MDB_LIKE:
773 return 4; break;
774 case MDB_ISNULL:
775 return 12; break;
776 default:
777 return 8; break;
778 }
779 } else {
780 switch (sarg->op) {
781 case MDB_EQUAL:
782 if (not_all_equal) return 2;
783 else return 1;
784 break;
785 case MDB_LIKE:
786 return 6; break;
787 case MDB_ISNULL:
788 return 12; break;
789 default:
790 return 9; break;
791 }
792 }
793 } else {
794 if (idx->num_keys == 1) {
795 switch (sarg->op) {
796 case MDB_EQUAL:
797 return 2; break;
798 case MDB_LIKE:
799 return 5; break;
800 case MDB_ISNULL:
801 return 12; break;
802 default:
803 return 10; break;
804 }
805 } else {
806 switch (sarg->op) {
807 case MDB_EQUAL:
808 if (not_all_equal) return 3;
809 else return 2;
810 break;
811 case MDB_LIKE:
812 return 7; break;
813 case MDB_ISNULL:
814 return 12; break;
815 default:
816 return 11; break;
817 }
818 }
819 }
820 return 0;
821 }
822 /*
823 * choose_index runs mdb_index_compute_cost for each available index and picks
824 * the best.
825 *
826 * Returns strategy to use (table scan, or index scan)
827 */
828 MdbStrategy
mdb_choose_index(MdbTableDef * table,int * choice)829 mdb_choose_index(MdbTableDef *table, int *choice)
830 {
831 unsigned int i;
832 MdbIndex *idx;
833 int cost = 0;
834 int least = 99;
835
836 *choice = -1;
837 for (i=0;i<table->num_idxs;i++) {
838 idx = g_ptr_array_index (table->indices, i);
839 cost = mdb_index_compute_cost(table, idx);
840 //printf("cost for %s is %d\n", idx->name, cost);
841 if (cost && cost < least) {
842 least = cost;
843 *choice = i;
844 }
845 }
846 /* and the winner is: *choice */
847 if (least==99) return MDB_TABLE_SCAN;
848 return MDB_INDEX_SCAN;
849 }
850 void
mdb_index_scan_init(MdbHandle * mdb,MdbTableDef * table)851 mdb_index_scan_init(MdbHandle *mdb, MdbTableDef *table)
852 {
853 int i;
854
855 if (mdb_get_option(MDB_USE_INDEX) && mdb_choose_index(table, &i) == MDB_INDEX_SCAN) {
856 table->strategy = MDB_INDEX_SCAN;
857 table->scan_idx = g_ptr_array_index (table->indices, i);
858 table->chain = g_malloc0(sizeof(MdbIndexChain));
859 table->mdbidx = mdb_clone_handle(mdb);
860 mdb_read_pg(table->mdbidx, table->scan_idx->first_pg);
861 //printf("best index is %s\n",table->scan_idx->name);
862 }
863 //printf("TABLE SCAN? %d\n", table->strategy);
864 }
865 void
mdb_index_scan_free(MdbTableDef * table)866 mdb_index_scan_free(MdbTableDef *table)
867 {
868 if (table->chain) {
869 g_free(table->chain);
870 table->chain = NULL;
871 }
872 if (table->mdbidx) {
873 mdb_close(table->mdbidx);
874 table->mdbidx = NULL;
875 }
876 }
877
mdb_free_indices(GPtrArray * indices)878 void mdb_free_indices(GPtrArray *indices)
879 {
880 unsigned int i;
881
882 if (!indices) return;
883 for (i=0; i<indices->len; i++)
884 g_free (g_ptr_array_index(indices, i));
885 g_ptr_array_free(indices, TRUE);
886 }
887