1 /* MDB Tools - A library for reading MS Access database file 2 * Copyright (C) 2000-2004 Brian Bruns 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Library General Public 6 * License as published by the Free Software Foundation; either 7 * version 2 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Library General Public License for more details. 13 * 14 * You should have received a copy of the GNU Library General Public 15 * License along with this library; if not, write to the 16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 * Boston, MA 02110-1301, USA. 18 */ 19 20 #include "mdbtools.h" main(int,char **)21 22 #ifdef DMALLOC 23 #include "dmalloc.h" 24 #endif 25 26 MdbIndexPage *mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain); 27 MdbIndexPage *mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg); 28 29 char idx_to_text[] = { 30 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0-7 0x00-0x07 */ 31 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8-15 0x09-0x0f */ 32 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16-23 0x10-0x17 */ 33 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24-31 0x19-0x1f */ 34 ' ', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 32-39 0x20-0x27 */ 35 0x00, 0x00, 0x00, 0x00, 0x00, ' ', ' ', 0x00, /* 40-47 0x29-0x2f */ 36 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', /* 48-55 0x30-0x37 */ 37 '^', '_', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56-63 0x39-0x3f */ 38 0x00, '`', 'a', 'b', 'd', 'f', 'g', 'h', /* 64-71 0x40-0x47 */ 39 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'r', /* 72-79 0x49-0x4f H */ 40 's', 't', 'u', 'v', 'w', 'x', 'z', '{', /* 80-87 0x50-0x57 P */ 41 '|', '}', '~', '5', '6', '7', '8', '9', /* 88-95 0x59-0x5f */ 42 0x00, '`', 'a', 'b', 'd', 'f', 'g', 'h', /* 96-103 0x60-0x67 */ 43 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'r', /* 014-111 0x69-0x6f h */ 44 's', 't', 'u', 'v', 'w', 'x', 'z', '{', /* 112-119 0x70-0x77 p */ 45 '|', '}', '~', 0x00, 0x00, 0x00, 0x00, 0x00, /* 120-127 0x78-0x7f */ 46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 128-135 0x80-0x87 */ 47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 54 0x00, 0x00, 0x00, 0x00, 0x00, '`', 0x00, 0x00, /* 0xc0-0xc7 */ 55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 56 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 57 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 58 0x00, '`', 0x00, '`', '`', '`', 0x00, 0x00, /* 0xe0-0xe7 */ 59 'f', 'f', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 60 0x00, 0x00, 0x00, 'r', 0x00, 0x00, 'r', 0x00, /* 0xf0-0xf7 */ 61 0x81, 0x00, 0x00, 0x00, 'x', 0x00, 0x00, 0x00, /* 0xf8-0xff */ 62 }; 63 64 65 GPtrArray * 66 mdb_read_indices(MdbTableDef *table) 67 { 68 MdbCatalogEntry *entry = table->entry; 69 MdbHandle *mdb = entry->mdb; 70 MdbFormatConstants *fmt = mdb->fmt; 71 MdbIndex *pidx; 72 unsigned int i, j; 73 int idx_num, key_num, col_num; 74 int cur_pos, name_sz, idx2_sz, type_offset; 75 int index_start_pg = mdb->cur_pg; 76 gchar *tmpbuf; 77 78 table->indices = g_ptr_array_new(); 79 80 if (IS_JET4(mdb)) { 81 cur_pos = table->index_start + 52 * table->num_real_idxs; 82 idx2_sz = 28; 83 type_offset = 23; 84 } else { 85 cur_pos = table->index_start + 39 * table->num_real_idxs; 86 idx2_sz = 20; 87 type_offset = 19; 88 } 89 90 tmpbuf = (gchar *) g_malloc(idx2_sz); 91 for (i=0;i<table->num_idxs;i++) { 92 read_pg_if_n(mdb, tmpbuf, &cur_pos, idx2_sz); 93 cur_pos += idx2_sz; 94 pidx = (MdbIndex *) g_malloc0(sizeof(MdbIndex)); 95 pidx->table = table; 96 pidx->index_num = mdb_get_int16(tmpbuf, 4); 97 pidx->index_type = tmpbuf[type_offset]; 98 g_ptr_array_add(table->indices, pidx); 99 } 100 g_free(tmpbuf); 101 102 for (i=0;i<table->num_idxs;i++) { 103 pidx = g_ptr_array_index (table->indices, i); 104 if (IS_JET4(mdb)) { 105 name_sz=read_pg_if_16(mdb, &cur_pos); 106 cur_pos += 2; 107 } else { 108 read_pg_if(mdb, &cur_pos, 0); 109 name_sz=mdb->pg_buf[cur_pos++]; 110 } 111 tmpbuf = g_malloc(name_sz); 112 read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz); 113 cur_pos += name_sz; 114 mdb_unicode2ascii(mdb, tmpbuf, name_sz, pidx->name, MDB_MAX_OBJ_NAME); 115 g_free(tmpbuf); 116 //fprintf(stderr, "index name %s\n", pidx->name); 117 } 118 119 mdb_read_alt_pg(mdb, entry->table_pg); 120 mdb_read_pg(mdb, index_start_pg); 121 cur_pos = table->index_start; 122 idx_num=0; 123 for (i=0;i<table->num_real_idxs;i++) { 124 if (IS_JET4(mdb)) cur_pos += 4; 125 do { 126 pidx = g_ptr_array_index (table->indices, idx_num++); 127 } while (pidx && pidx->index_type==2); 128 129 /* if there are more real indexes than index entries left after 130 removing type 2's decrement real indexes and continue. Happens 131 on Northwind Orders table. 132 */ 133 if (!pidx) { 134 table->num_real_idxs--; 135 continue; 136 } 137 138 pidx->num_rows = mdb_get_int32(mdb->alt_pg_buf, 139 fmt->tab_cols_start_offset + 140 (i*fmt->tab_ridx_entry_size)); 141 142 key_num=0; 143 for (j=0;j<MDB_MAX_IDX_COLS;j++) { 144 col_num=read_pg_if_16(mdb,&cur_pos); 145 cur_pos += 2; 146 read_pg_if(mdb, &cur_pos, 0); 147 cur_pos++; 148 if (col_num == 0xFFFF) 149 continue; 150 /* set column number to a 1 based column number and store */ 151 pidx->key_col_num[key_num] = col_num + 1; 152 pidx->key_col_order[key_num] = 153 (mdb->pg_buf[cur_pos-1]) ? MDB_ASC : MDB_DESC; 154 key_num++; 155 } 156 pidx->num_keys = key_num; 157 158 cur_pos += 4; 159 pidx->first_pg = read_pg_if_32(mdb, &cur_pos); 160 cur_pos += 4; 161 read_pg_if(mdb, &cur_pos, 0); 162 pidx->flags = mdb->pg_buf[cur_pos++]; 163 if (IS_JET4(mdb)) cur_pos += 9; 164 } 165 return NULL; 166 } 167 void 168 mdb_index_hash_text(guchar *text, guchar *hash) 169 { 170 unsigned int k; 171 172 for (k=0;k<strlen(text);k++) { 173 hash[k] = idx_to_text[text[k]]; 174 if (!(hash[k])) fprintf(stderr, 175 "No translation available for %02x %d\n", 176 text[k],text[k]); 177 } 178 hash[strlen(text)]=0; 179 } 180 /* 181 * reverse the order of the column for hashing 182 */ 183 void 184 mdb_index_swap_n(unsigned char *src, int sz, unsigned char *dest) 185 { 186 int i, j = 0; 187 188 for (i = sz-1; i >= 0; i--) { 189 dest[j++] = src[i]; 190 } 191 } 192 void 193 mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg) 194 { 195 //guint32 cache_int; 196 unsigned char *c; 197 198 switch (col->col_type) { 199 case MDB_TEXT: 200 mdb_index_hash_text(sarg->value.s, idx_sarg->value.s); 201 break; 202 203 case MDB_LONGINT: 204 idx_sarg->value.i = GUINT32_SWAP_LE_BE(sarg->value.i); 205 //cache_int = sarg->value.i * -1; 206 c = (unsigned char *) &(idx_sarg->value.i); 207 c[0] |= 0x80; 208 //printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]); 209 break; 210 211 case MDB_INT: 212 break; 213 214 default: 215 break; 216 } 217 } 218 #if 0 219 int 220 mdb_index_test_sarg(MdbHandle *mdb, MdbColumn *col, MdbSarg *sarg, int offset, int len) 221 { 222 char tmpbuf[256]; 223 int lastchar; 224 225 switch (col->col_type) { 226 case MDB_BYTE: 227 return mdb_test_int(sarg, mdb_pg_get_byte(mdb, offset)); 228 break; 229 case MDB_INT: 230 return mdb_test_int(sarg, mdb_pg_get_int16(mdb, offset)); 231 break; 232 case MDB_LONGINT: 233 return mdb_test_int(sarg, mdb_pg_get_int32(mdb, offset)); 234 break; 235 case MDB_TEXT: 236 strncpy(tmpbuf, &mdb->pg_buf[offset],255); 237 lastchar = len > 255 ? 255 : len; 238 tmpbuf[lastchar]='\0'; 239 return mdb_test_string(sarg, tmpbuf); 240 default: 241 fprintf(stderr, "Calling mdb_test_sarg on unknown type. Add code to mdb_test_sarg() for type %d\n",col->col_type); 242 break; 243 } 244 return 1; 245 } 246 #endif 247 int 248 mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, unsigned char *buf, int len) 249 { 250 unsigned int i, j; 251 MdbColumn *col; 252 MdbTableDef *table = idx->table; 253 MdbSarg *idx_sarg; 254 MdbSarg *sarg; 255 MdbField field; 256 MdbSargNode node; 257 //int c_offset = 0, 258 int c_len; 259 260 //fprintf(stderr,"mdb_index_test_sargs called on "); 261 //for (i=0;i<len;i++) 262 //fprintf(stderr,"%02x ",buf[i]); //mdb->pg_buf[offset+i]); 263 //fprintf(stderr,"\n"); 264 for (i=0;i<idx->num_keys;i++) { 265 //c_offset++; /* the per column null indicator/flags */ 266 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); 267 /* 268 * This will go away eventually 269 */ 270 if (col->col_type==MDB_TEXT) { 271 //c_len = strlen(&mdb->pg_buf[offset + c_offset]); 272 c_len = strlen(buf); 273 } else { 274 c_len = col->col_size; 275 //fprintf(stderr,"Only text types currently supported. How did we get here?\n"); 276 } 277 /* 278 * If we have no cached index values for this column, 279 * create them. 280 */ 281 if (col->num_sargs && !col->idx_sarg_cache) { 282 col->idx_sarg_cache = g_ptr_array_new(); 283 for (j=0;j<col->num_sargs;j++) { 284 sarg = g_ptr_array_index (col->sargs, j); 285 idx_sarg = g_memdup(sarg,sizeof(MdbSarg)); 286 //printf("calling mdb_index_cache_sarg\n"); 287 mdb_index_cache_sarg(col, sarg, idx_sarg); 288 g_ptr_array_add(col->idx_sarg_cache, idx_sarg); 289 } 290 } 291 292 for (j=0;j<col->num_sargs;j++) { 293 sarg = g_ptr_array_index (col->idx_sarg_cache, j); 294 /* XXX - kludge */ 295 node.op = sarg->op; 296 node.value = sarg->value; 297 //field.value = &mdb->pg_buf[offset + c_offset]; 298 field.value = buf; 299 field.siz = c_len; 300 field.is_null = FALSE; 301 if (!mdb_test_sarg(mdb, col, &node, &field)) { 302 /* sarg didn't match, no sense going on */ 303 return 0; 304 } 305 } 306 } 307 return 1; 308 } 309 /* 310 * pack the pages bitmap 311 */ 312 int 313 mdb_index_pack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg) 314 { 315 int mask_bit = 0; 316 int mask_pos = 0x16; 317 int mask_byte = 0; 318 int elem = 0; 319 int len, start, i; 320 321 start = ipg->idx_starts[elem++]; 322 323 while (start) { 324 //fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]); 325 len = ipg->idx_starts[elem] - start; 326 //fprintf(stdout, "len is %d\n", len); 327 for (i=0; i < len; i++) { 328 mask_bit++; 329 if (mask_bit==8) { 330 mask_bit=0; 331 mdb->pg_buf[mask_pos++] = mask_byte; 332 mask_byte = 0; 333 } 334 /* upon reaching the len, set the bit */ 335 } 336 mask_byte = (1 << mask_bit) | mask_byte; 337 //fprintf(stdout, "mask byte is %02x at %d\n", mask_byte, mask_pos); 338 start = ipg->idx_starts[elem++]; 339 } 340 /* flush the last byte if any */ 341 mdb->pg_buf[mask_pos++] = mask_byte; 342 /* remember to zero the rest of the bitmap */ 343 for (i = mask_pos; i < 0xf8; i++) { 344 mdb->pg_buf[mask_pos++] = 0; 345 } 346 return 0; 347 } 348 /* 349 * unpack the pages bitmap 350 */ 351 int 352 mdb_index_unpack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg) 353 { 354 int mask_bit = 0; 355 int mask_pos = 0x16; 356 int mask_byte; 357 int start = 0xf8; 358 int elem = 0; 359 int len = 0; 360 361 ipg->idx_starts[elem++]=start; 362 363 //fprintf(stdout, "Unpacking index page %lu\n", ipg->pg); 364 do { 365 len = 0; 366 do { 367 mask_bit++; 368 if (mask_bit==8) { 369 mask_bit=0; 370 mask_pos++; 371 } 372 mask_byte = mdb->pg_buf[mask_pos]; 373 len++; 374 } while (mask_pos <= 0xf8 && !((1 << mask_bit) & mask_byte)); 375 //fprintf(stdout, "%d %d %d %d\n", mask_pos, mask_bit, mask_byte, len); 376 377 start += len; 378 if (mask_pos < 0xf8) ipg->idx_starts[elem++]=start; 379 380 } while (mask_pos < 0xf8); 381 382 /* if we zero the next element, so we don't pick up the last pages starts*/ 383 ipg->idx_starts[elem]=0; 384 385 return elem; 386 } 387 /* 388 * find the next entry on a page (either index or leaf). Uses state information 389 * stored in the MdbIndexPage across calls. 390 */ 391 int 392 mdb_index_find_next_on_page(MdbHandle *mdb, MdbIndexPage *ipg) 393 { 394 if (!ipg->pg) return 0; 395 396 /* if this page has not been unpacked to it */ 397 if (!ipg->idx_starts[0]){ 398 //fprintf(stdout, "Unpacking page %d\n", ipg->pg); 399 mdb_index_unpack_bitmap(mdb, ipg); 400 } 401 402 403 if (ipg->idx_starts[ipg->start_pos + 1]==0) return 0; 404 ipg->len = ipg->idx_starts[ipg->start_pos+1] - ipg->idx_starts[ipg->start_pos]; 405 ipg->start_pos++; 406 //fprintf(stdout, "Start pos %d\n", ipg->start_pos); 407 408 return ipg->len; 409 } 410 void mdb_index_page_reset(MdbIndexPage *ipg) 411 { 412 ipg->offset = 0xf8; /* start byte of the index entries */ 413 ipg->start_pos=0; 414 ipg->len = 0; 415 ipg->idx_starts[0]=0; 416 } 417 void mdb_index_page_init(MdbIndexPage *ipg) 418 { 419 memset(ipg, 0, sizeof(MdbIndexPage)); 420 mdb_index_page_reset(ipg); 421 } 422 /* 423 * find the next leaf page if any given a chain. Assumes any exhausted leaf 424 * pages at the end of the chain have been peeled off before the call. 425 */ 426 MdbIndexPage * 427 mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) 428 { 429 MdbIndexPage *ipg, *newipg; 430 guint32 pg; 431 guint passed = 0; 432 433 ipg = mdb_index_read_bottom_pg(mdb, idx, chain); 434 435 /* 436 * If we are at the first page deep and it's not an index page then 437 * we are simply done. (there is no page to find 438 */ 439 440 if (mdb->pg_buf[0]==MDB_PAGE_LEAF) { 441 /* Indexes can have leaves at the end that don't appear 442 * in the upper tree, stash the last index found so 443 * we can follow it at the end. */ 444 chain->last_leaf_found = ipg->pg; 445 return ipg; 446 } 447 448 /* 449 * apply sargs here, currently we don't 450 */ 451 do { 452 ipg->len = 0; 453 //printf("finding next on pg %lu\n", ipg->pg); 454 if (!mdb_index_find_next_on_page(mdb, ipg)) { 455 //printf("find_next_on_page returned 0\n"); 456 return 0; 457 } 458 pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 3); 459 //printf("Looking at pg %lu at %lu %d\n", pg, ipg->offset, ipg->len); 460 ipg->offset += ipg->len; 461 462 /* 463 * add to the chain and call this function 464 * recursively. 465 */ 466 newipg = mdb_chain_add_page(mdb, chain, pg); 467 newipg = mdb_find_next_leaf(mdb, idx, chain); 468 //printf("returning pg %lu\n",newipg->pg); 469 return newipg; 470 } while (!passed); 471 /* no more pages */ 472 return NULL; 473 474 } 475 MdbIndexPage * 476 mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg) 477 { 478 MdbIndexPage *ipg; 479 480 chain->cur_depth++; 481 if (chain->cur_depth > MDB_MAX_INDEX_DEPTH) { 482 fprintf(stderr,"Error! maximum index depth of %d exceeded. This is probably due to a programming bug, If you are confident that your indexes really are this deep, adjust MDB_MAX_INDEX_DEPTH in mdbtools.h and recompile.\n", MDB_MAX_INDEX_DEPTH); 483 exit(1); 484 } 485 ipg = &(chain->pages[chain->cur_depth - 1]); 486 mdb_index_page_init(ipg); 487 ipg->pg = pg; 488 489 return ipg; 490 } 491 /* 492 * returns the bottom page of the IndexChain, if IndexChain is empty it 493 * initializes it by reading idx->first_pg (the root page) 494 */ 495 MdbIndexPage * 496 mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) 497 { 498 MdbIndexPage *ipg; 499 500 /* 501 * if it's new use the root index page (idx->first_pg) 502 */ 503 if (!chain->cur_depth) { 504 ipg = &(chain->pages[0]); 505 mdb_index_page_init(ipg); 506 chain->cur_depth = 1; 507 ipg->pg = idx->first_pg; 508 if (!(ipg = mdb_find_next_leaf(mdb, idx, chain))) 509 return 0; 510 } else { 511 ipg = &(chain->pages[chain->cur_depth - 1]); 512 ipg->len = 0; 513 } 514 515 mdb_read_pg(mdb, ipg->pg); 516 517 return ipg; 518 } 519 /* 520 * unwind the stack and search for new leaf node 521 */ 522 MdbIndexPage * 523 mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) 524 { 525 MdbIndexPage *ipg; 526 527 //printf("page %lu finished\n",ipg->pg); 528 if (chain->cur_depth==1) { 529 //printf("cur_depth == 1 we're out\n"); 530 return NULL; 531 } 532 /* 533 * unwind the stack until we find something or reach 534 * the top. 535 */ 536 ipg = NULL; 537 while (chain->cur_depth>1 && ipg==NULL) { 538 //printf("chain depth %d\n", chain->cur_depth); 539 chain->cur_depth--; 540 ipg = mdb_find_next_leaf(mdb, idx, chain); 541 if (ipg) mdb_index_find_next_on_page(mdb, ipg); 542 } 543 if (chain->cur_depth==1) { 544 //printf("last leaf %lu\n", chain->last_leaf_found); 545 return NULL; 546 } 547 return ipg; 548 } 549 /* 550 * the main index function. 551 * caller provides an index chain which is the current traversal of index 552 * pages from the root page to the leaf. Initially passed as blank, 553 * mdb_index_find_next will store it's state information here. Each invocation 554 * then picks up where the last one left off, allowing us to scroll through 555 * the index one by one. 556 * 557 * Sargs are applied here but also need to be applied on the whole row b/c 558 * text columns may return false positives due to hashing and non-index 559 * columns with sarg values can't be tested here. 560 */ 561 int 562 mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 *pg, guint16 *row) 563 { 564 MdbIndexPage *ipg; 565 int passed = 0; 566 int idx_sz; 567 int idx_start = 0; 568 MdbColumn *col; 569 570 ipg = mdb_index_read_bottom_pg(mdb, idx, chain); 571 572 /* 573 * loop while the sargs don't match 574 */ 575 do { 576 ipg->len = 0; 577 /* 578 * if no more rows on this leaf, try to find a new leaf 579 */ 580 if (!mdb_index_find_next_on_page(mdb, ipg)) { 581 if (!chain->clean_up_mode) { 582 if (!(ipg = mdb_index_unwind(mdb, idx, chain))) 583 chain->clean_up_mode = 1; 584 } 585 if (chain->clean_up_mode) { 586 //fprintf(stdout,"in cleanup mode\n"); 587 588 if (!chain->last_leaf_found) return 0; 589 mdb_read_pg(mdb, chain->last_leaf_found); 590 chain->last_leaf_found = mdb_pg_get_int24(mdb, 0x0c); 591 //printf("next leaf %lu\n", chain->last_leaf_found); 592 mdb_read_pg(mdb, chain->last_leaf_found); 593 /* reuse the chain for cleanup mode */ 594 chain->cur_depth = 1; 595 ipg = &chain->pages[0]; 596 mdb_index_page_init(ipg); 597 ipg->pg = chain->last_leaf_found; 598 //printf("next on page %d\n", 599 if (!mdb_index_find_next_on_page(mdb, ipg)) 600 return 0; 601 } 602 } 603 *row = mdb->pg_buf[ipg->offset + ipg->len - 1]; 604 *pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4); 605 //printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len); 606 col=g_ptr_array_index(idx->table->columns,idx->key_col_num[0]-1); 607 idx_sz = mdb_col_fixed_size(col); 608 /* handle compressed indexes, single key indexes only? */ 609 if (idx->num_keys==1 && idx_sz>0 && ipg->len - 4 < idx_sz) { 610 //printf("short index found\n"); 611 //buffer_dump(ipg->cache_value, 0, idx_sz); 612 memcpy(&ipg->cache_value[idx_sz - (ipg->len - 4)], &mdb->pg_buf[ipg->offset], ipg->len); 613 //buffer_dump(ipg->cache_value, 0, idx_sz); 614 } else { 615 idx_start = ipg->offset + (ipg->len - 4 - idx_sz); 616 memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz); 617 } 618 619 //idx_start = ipg->offset + (ipg->len - 4 - idx_sz); 620 passed = mdb_index_test_sargs(mdb, idx, ipg->cache_value, idx_sz); 621 622 ipg->offset += ipg->len; 623 } while (!passed); 624 625 //fprintf(stdout,"len = %d pos %d\n", ipg->len, ipg->mask_pos); 626 //buffer_dump(mdb->pg_buf, ipg->offset, ipg->offset+ipg->len-1); 627 628 return ipg->len; 629 } 630 /* 631 * XXX - FIX ME 632 * This function is grossly inefficient. It scans the entire index building 633 * an IndexChain to a specific row. We should be checking the index pages 634 * for matches against the indexed fields to find the proper leaf page, but 635 * getting it working first and then make it fast! 636 */ 637 int 638 mdb_index_find_row(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 pg, guint16 row) 639 { 640 MdbIndexPage *ipg; 641 int passed = 0; 642 guint32 datapg; 643 guint16 datarow; 644 645 ipg = mdb_index_read_bottom_pg(mdb, idx, chain); 646 647 do { 648 ipg->len = 0; 649 /* 650 * if no more rows on this leaf, try to find a new leaf 651 */ 652 if (!mdb_index_find_next_on_page(mdb, ipg)) { 653 /* back to top? We're done */ 654 if (chain->cur_depth==1) 655 return 0; 656 657 /* 658 * unwind the stack until we find something or reach 659 * the top. 660 */ 661 while (chain->cur_depth>1) { 662 chain->cur_depth--; 663 if (!(ipg = mdb_find_next_leaf(mdb, idx, chain))) 664 return 0; 665 mdb_index_find_next_on_page(mdb, ipg); 666 } 667 if (chain->cur_depth==1) 668 return 0; 669 } 670 /* test row and pg */ 671 datarow = mdb->pg_buf[ipg->offset + ipg->len - 1]; 672 datapg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4); 673 674 if (datapg == pg && datarow == row) { 675 passed = 1; 676 } 677 ipg->offset += ipg->len; 678 } while (!passed); 679 680 /* index chain from root to leaf should now be in "chain" */ 681 return 1; 682 } 683 684 void mdb_index_walk(MdbTableDef *table, MdbIndex *idx) 685 { 686 MdbHandle *mdb = table->entry->mdb; 687 int cur_pos = 0; 688 unsigned char marker; 689 MdbColumn *col; 690 unsigned int i; 691 692 if (idx->num_keys!=1) return; 693 694 mdb_read_pg(mdb, idx->first_pg); 695 cur_pos = 0xf8; 696 697 for (i=0;i<idx->num_keys;i++) { 698 marker = mdb->pg_buf[cur_pos++]; 699 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); 700 //printf("column %d coltype %d col_size %d (%d)\n",i,col->col_type, mdb_col_fixed_size(col), col->col_size); 701 } 702 } 703 void 704 mdb_index_dump(MdbTableDef *table, MdbIndex *idx) 705 { 706 unsigned int i; 707 MdbColumn *col; 708 709 fprintf(stdout,"index number %d\n", idx->index_num); 710 fprintf(stdout,"index name %s\n", idx->name); 711 fprintf(stdout,"index first page %d\n", idx->first_pg); 712 fprintf(stdout,"index rows %d\n", idx->num_rows); 713 if (idx->index_type==1) fprintf(stdout,"index is a primary key\n"); 714 for (i=0;i<idx->num_keys;i++) { 715 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); 716 fprintf(stdout,"Column %s(%d) Sorted %s Unique: %s\n", 717 col->name, 718 idx->key_col_num[i], 719 idx->key_col_order[i]==MDB_ASC ? "ascending" : "descending", 720 idx->flags & MDB_IDX_UNIQUE ? "Yes" : "No" 721 ); 722 } 723 mdb_index_walk(table, idx); 724 } 725 /* 726 * compute_cost tries to assign a cost to a given index using the sargs 727 * available in this query. 728 * 729 * Indexes with no matching sargs are assigned 0 730 * Unique indexes are preferred over non-uniques 731 * Operator preference is equal, like, isnull, others 732 */ 733 int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) 734 { 735 unsigned int i; 736 MdbColumn *col; 737 MdbSarg *sarg = NULL; 738 int not_all_equal = 0; 739 740 if (!idx->num_keys) return 0; 741 if (idx->num_keys > 1) { 742 for (i=0;i<idx->num_keys;i++) { 743 col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); 744 if (col->sargs) sarg = g_ptr_array_index (col->sargs, 0); 745 if (!sarg || sarg->op != MDB_EQUAL) not_all_equal++; 746 } 747 } 748 749 col=g_ptr_array_index(table->columns,idx->key_col_num[0]-1); 750 /* 751 * if this is the first key column and there are no sargs, 752 * then this index is useless. 753 */ 754 if (!col->num_sargs) return 0; 755 756 sarg = g_ptr_array_index (col->sargs, 0); 757 758 /* 759 * a like with a wild card first is useless as a sarg */ 760 if (sarg->op == MDB_LIKE && sarg->value.s[0]=='%') 761 return 0; 762 763 /* 764 * this needs a lot of tweaking. 765 */ 766 if (idx->flags & MDB_IDX_UNIQUE) { 767 if (idx->num_keys == 1) { 768 //printf("op is %d\n", sarg->op); 769 switch (sarg->op) { 770 case MDB_EQUAL: 771 return 1; break; 772 case MDB_LIKE: 773 return 4; break; 774 case MDB_ISNULL: 775 return 12; break; 776 default: 777 return 8; break; 778 } 779 } else { 780 switch (sarg->op) { 781 case MDB_EQUAL: 782 if (not_all_equal) return 2; 783 else return 1; 784 break; 785 case MDB_LIKE: 786 return 6; break; 787 case MDB_ISNULL: 788 return 12; break; 789 default: 790 return 9; break; 791 } 792 } 793 } else { 794 if (idx->num_keys == 1) { 795 switch (sarg->op) { 796 case MDB_EQUAL: 797 return 2; break; 798 case MDB_LIKE: 799 return 5; break; 800 case MDB_ISNULL: 801 return 12; break; 802 default: 803 return 10; break; 804 } 805 } else { 806 switch (sarg->op) { 807 case MDB_EQUAL: 808 if (not_all_equal) return 3; 809 else return 2; 810 break; 811 case MDB_LIKE: 812 return 7; break; 813 case MDB_ISNULL: 814 return 12; break; 815 default: 816 return 11; break; 817 } 818 } 819 } 820 return 0; 821 } 822 /* 823 * choose_index runs mdb_index_compute_cost for each available index and picks 824 * the best. 825 * 826 * Returns strategy to use (table scan, or index scan) 827 */ 828 MdbStrategy 829 mdb_choose_index(MdbTableDef *table, int *choice) 830 { 831 unsigned int i; 832 MdbIndex *idx; 833 int cost = 0; 834 int least = 99; 835 836 *choice = -1; 837 for (i=0;i<table->num_idxs;i++) { 838 idx = g_ptr_array_index (table->indices, i); 839 cost = mdb_index_compute_cost(table, idx); 840 //printf("cost for %s is %d\n", idx->name, cost); 841 if (cost && cost < least) { 842 least = cost; 843 *choice = i; 844 } 845 } 846 /* and the winner is: *choice */ 847 if (least==99) return MDB_TABLE_SCAN; 848 return MDB_INDEX_SCAN; 849 } 850 void 851 mdb_index_scan_init(MdbHandle *mdb, MdbTableDef *table) 852 { 853 int i; 854 855 if (mdb_get_option(MDB_USE_INDEX) && mdb_choose_index(table, &i) == MDB_INDEX_SCAN) { 856 table->strategy = MDB_INDEX_SCAN; 857 table->scan_idx = g_ptr_array_index (table->indices, i); 858 table->chain = g_malloc0(sizeof(MdbIndexChain)); 859 table->mdbidx = mdb_clone_handle(mdb); 860 mdb_read_pg(table->mdbidx, table->scan_idx->first_pg); 861 //printf("best index is %s\n",table->scan_idx->name); 862 } 863 //printf("TABLE SCAN? %d\n", table->strategy); 864 } 865 void 866 mdb_index_scan_free(MdbTableDef *table) 867 { 868 if (table->chain) { 869 g_free(table->chain); 870 table->chain = NULL; 871 } 872 if (table->mdbidx) { 873 mdb_close(table->mdbidx); 874 table->mdbidx = NULL; 875 } 876 } 877 878 void mdb_free_indices(GPtrArray *indices) 879 { 880 unsigned int i; 881 882 if (!indices) return; 883 for (i=0; i<indices->len; i++) 884 g_free (g_ptr_array_index(indices, i)); 885 g_ptr_array_free(indices, TRUE); 886 } 887