1 /* MDB Tools - A library for reading MS Access database file
2  * Copyright (C) 2000-2004 Brian Bruns
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA  02110-1301, USA.
18  */
19 
20 #include "mdbtools.h"
21 
22 #ifdef DMALLOC
23 #include "dmalloc.h"
24 #endif
25 
26 MdbIndexPage *mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain);
27 MdbIndexPage *mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg);
28 
29 char idx_to_text[] = {
30 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0-7     0x00-0x07 */
31 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8-15    0x09-0x0f */
32 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16-23   0x10-0x17 */
33 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24-31   0x19-0x1f */
34 ' ',  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 32-39   0x20-0x27 */
35 0x00, 0x00, 0x00, 0x00, 0x00, ' ',  ' ',  0x00, /* 40-47   0x29-0x2f */
36 'V',  'W',  'X',  'Y',  'Z',  '[',  '\\', ']',  /* 48-55   0x30-0x37 */
37 '^',  '_',  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56-63   0x39-0x3f */
38 0x00, '`',  'a',  'b',  'd',  'f',  'g',  'h',  /* 64-71   0x40-0x47 */
39 'i',  'j',  'k',  'l',  'm',  'o',  'p',  'r',  /* 72-79   0x49-0x4f  H */
40 's',  't',  'u',  'v',  'w',  'x',  'z',  '{',  /* 80-87   0x50-0x57  P */
41 '|',  '}',  '~',  '5',  '6',  '7',  '8',  '9',  /* 88-95   0x59-0x5f */
42 0x00, '`',  'a',  'b',  'd',  'f',  'g',  'h',  /* 96-103  0x60-0x67 */
43 'i',  'j',  'k',  'l',  'm',  'o',  'p',  'r',  /* 014-111 0x69-0x6f  h */
44 's',  't',  'u',  'v',  'w',  'x',  'z',  '{',  /* 112-119 0x70-0x77  p */
45 '|',  '}',  '~',  0x00, 0x00, 0x00, 0x00, 0x00, /* 120-127 0x78-0x7f */
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 128-135 0x80-0x87 */
47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */
52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */
54 0x00, 0x00, 0x00, 0x00, 0x00, '`',  0x00, 0x00, /* 0xc0-0xc7 */
55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
56 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
57 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
58 0x00, '`',  0x00, '`',  '`',  '`',  0x00, 0x00, /* 0xe0-0xe7 */
59 'f',  'f',  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
60 0x00, 0x00, 0x00, 'r',  0x00, 0x00, 'r',  0x00, /* 0xf0-0xf7 */
61 0x81, 0x00, 0x00, 0x00, 'x',  0x00, 0x00, 0x00, /* 0xf8-0xff */
62 };
63 
64 
65 GPtrArray *
mdb_read_indices(MdbTableDef * table)66 mdb_read_indices(MdbTableDef *table)
67 {
68 	MdbCatalogEntry *entry = table->entry;
69 	MdbHandle *mdb = entry->mdb;
70 	MdbFormatConstants *fmt = mdb->fmt;
71 	MdbIndex *pidx;
72 	unsigned int i, j;
73 	int idx_num, key_num, col_num;
74 	int cur_pos, name_sz, idx2_sz, type_offset;
75 	int index_start_pg = mdb->cur_pg;
76 	gchar *tmpbuf;
77 
78         table->indices = g_ptr_array_new();
79 
80         if (IS_JET4(mdb)) {
81 		cur_pos = table->index_start + 52 * table->num_real_idxs;
82 		idx2_sz = 28;
83 		type_offset = 23;
84 	} else {
85 		cur_pos = table->index_start + 39 * table->num_real_idxs;
86 		idx2_sz = 20;
87 		type_offset = 19;
88 	}
89 
90 	tmpbuf = (gchar *) g_malloc(idx2_sz);
91 	for (i=0;i<table->num_idxs;i++) {
92 		read_pg_if_n(mdb, tmpbuf, &cur_pos, idx2_sz);
93 		cur_pos += idx2_sz;
94 		pidx = (MdbIndex *) g_malloc0(sizeof(MdbIndex));
95 		pidx->table = table;
96 		pidx->index_num = mdb_get_int16(tmpbuf, 4);
97 		pidx->index_type = tmpbuf[type_offset];
98 		g_ptr_array_add(table->indices, pidx);
99 	}
100 	g_free(tmpbuf);
101 
102 	for (i=0;i<table->num_idxs;i++) {
103 		pidx = g_ptr_array_index (table->indices, i);
104 		if (IS_JET4(mdb)) {
105 			name_sz=read_pg_if_16(mdb, &cur_pos);
106 			cur_pos += 2;
107 		} else {
108 			read_pg_if(mdb, &cur_pos, 0);
109 			name_sz=mdb->pg_buf[cur_pos++];
110 		}
111 		tmpbuf = g_malloc(name_sz);
112 		read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz);
113 		cur_pos += name_sz;
114 		mdb_unicode2ascii(mdb, tmpbuf, name_sz, pidx->name, MDB_MAX_OBJ_NAME);
115 		g_free(tmpbuf);
116 		//fprintf(stderr, "index name %s\n", pidx->name);
117 	}
118 
119 	mdb_read_alt_pg(mdb, entry->table_pg);
120 	mdb_read_pg(mdb, index_start_pg);
121 	cur_pos = table->index_start;
122 	idx_num=0;
123 	for (i=0;i<table->num_real_idxs;i++) {
124 		if (IS_JET4(mdb)) cur_pos += 4;
125 		do {
126 			pidx = g_ptr_array_index (table->indices, idx_num++);
127 		} while (pidx && pidx->index_type==2);
128 
129 		/* if there are more real indexes than index entries left after
130 		   removing type 2's decrement real indexes and continue.  Happens
131 		   on Northwind Orders table.
132 		*/
133 		if (!pidx) {
134 			table->num_real_idxs--;
135 			continue;
136 		}
137 
138 		pidx->num_rows = mdb_get_int32(mdb->alt_pg_buf,
139 				fmt->tab_cols_start_offset +
140 				(i*fmt->tab_ridx_entry_size));
141 
142 		key_num=0;
143 		for (j=0;j<MDB_MAX_IDX_COLS;j++) {
144 			col_num=read_pg_if_16(mdb,&cur_pos);
145 			cur_pos += 2;
146 			read_pg_if(mdb, &cur_pos, 0);
147 			cur_pos++;
148 			if (col_num == 0xFFFF)
149 				continue;
150 			/* set column number to a 1 based column number and store */
151 			pidx->key_col_num[key_num] = col_num + 1;
152 			pidx->key_col_order[key_num] =
153 				(mdb->pg_buf[cur_pos-1]) ? MDB_ASC : MDB_DESC;
154 			key_num++;
155 		}
156 		pidx->num_keys = key_num;
157 
158 		cur_pos += 4;
159 		pidx->first_pg = read_pg_if_32(mdb, &cur_pos);
160 		cur_pos += 4;
161 		read_pg_if(mdb, &cur_pos, 0);
162 		pidx->flags = mdb->pg_buf[cur_pos++];
163 		if (IS_JET4(mdb)) cur_pos += 9;
164 	}
165 	return NULL;
166 }
167 void
mdb_index_hash_text(guchar * text,guchar * hash)168 mdb_index_hash_text(guchar *text, guchar *hash)
169 {
170 	unsigned int k;
171 
172 	for (k=0;k<strlen(text);k++) {
173 		hash[k] = idx_to_text[text[k]];
174 		if (!(hash[k])) fprintf(stderr,
175 				"No translation available for %02x %d\n",
176 				text[k],text[k]);
177 	}
178 	hash[strlen(text)]=0;
179 }
180 /*
181  * reverse the order of the column for hashing
182  */
183 void
mdb_index_swap_n(unsigned char * src,int sz,unsigned char * dest)184 mdb_index_swap_n(unsigned char *src, int sz, unsigned char *dest)
185 {
186 	int i, j = 0;
187 
188 	for (i = sz-1; i >= 0; i--) {
189 		dest[j++] = src[i];
190 	}
191 }
192 void
mdb_index_cache_sarg(MdbColumn * col,MdbSarg * sarg,MdbSarg * idx_sarg)193 mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg)
194 {
195 	//guint32 cache_int;
196 	unsigned char *c;
197 
198 	switch (col->col_type) {
199 		case MDB_TEXT:
200 		mdb_index_hash_text(sarg->value.s, idx_sarg->value.s);
201 		break;
202 
203 		case MDB_LONGINT:
204 		idx_sarg->value.i = GUINT32_SWAP_LE_BE(sarg->value.i);
205 		//cache_int = sarg->value.i * -1;
206 		c = (unsigned char *) &(idx_sarg->value.i);
207 		c[0] |= 0x80;
208 		//printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]);
209 		break;
210 
211 		case MDB_INT:
212 		break;
213 
214 		default:
215 		break;
216 	}
217 }
218 #if 0
219 int
220 mdb_index_test_sarg(MdbHandle *mdb, MdbColumn *col, MdbSarg *sarg, int offset, int len)
221 {
222 char tmpbuf[256];
223 int lastchar;
224 
225 	switch (col->col_type) {
226 		case MDB_BYTE:
227 			return mdb_test_int(sarg, mdb_pg_get_byte(mdb, offset));
228 			break;
229 		case MDB_INT:
230 			return mdb_test_int(sarg, mdb_pg_get_int16(mdb, offset));
231 			break;
232 		case MDB_LONGINT:
233 			return mdb_test_int(sarg, mdb_pg_get_int32(mdb, offset));
234 			break;
235 		case MDB_TEXT:
236 			strncpy(tmpbuf, &mdb->pg_buf[offset],255);
237 			lastchar = len > 255 ? 255 : len;
238 			tmpbuf[lastchar]='\0';
239 			return mdb_test_string(sarg, tmpbuf);
240 		default:
241 			fprintf(stderr, "Calling mdb_test_sarg on unknown type.  Add code to mdb_test_sarg() for type %d\n",col->col_type);
242 			break;
243 	}
244 	return 1;
245 }
246 #endif
247 int
mdb_index_test_sargs(MdbHandle * mdb,MdbIndex * idx,unsigned char * buf,int len)248 mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, unsigned char *buf, int len)
249 {
250 	unsigned int i, j;
251 	MdbColumn *col;
252 	MdbTableDef *table = idx->table;
253 	MdbSarg *idx_sarg;
254 	MdbSarg *sarg;
255 	MdbField field;
256 	MdbSargNode node;
257 	//int c_offset = 0,
258 	int c_len;
259 
260 	//fprintf(stderr,"mdb_index_test_sargs called on ");
261 	//for (i=0;i<len;i++)
262 		//fprintf(stderr,"%02x ",buf[i]); //mdb->pg_buf[offset+i]);
263 	//fprintf(stderr,"\n");
264 	for (i=0;i<idx->num_keys;i++) {
265 		//c_offset++; /* the per column null indicator/flags */
266 		col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
267 		/*
268 		 * This will go away eventually
269 		 */
270 		if (col->col_type==MDB_TEXT) {
271 			//c_len = strlen(&mdb->pg_buf[offset + c_offset]);
272 			c_len = strlen(buf);
273 		} else {
274 			c_len = col->col_size;
275 			//fprintf(stderr,"Only text types currently supported.  How did we get here?\n");
276 		}
277 		/*
278 		 * If we have no cached index values for this column,
279 		 * create them.
280 		 */
281 		if (col->num_sargs && !col->idx_sarg_cache) {
282 			col->idx_sarg_cache = g_ptr_array_new();
283 			for (j=0;j<col->num_sargs;j++) {
284 				sarg = g_ptr_array_index (col->sargs, j);
285 				idx_sarg = g_memdup(sarg,sizeof(MdbSarg));
286 				//printf("calling mdb_index_cache_sarg\n");
287 				mdb_index_cache_sarg(col, sarg, idx_sarg);
288 				g_ptr_array_add(col->idx_sarg_cache, idx_sarg);
289 			}
290 		}
291 
292 		for (j=0;j<col->num_sargs;j++) {
293 			sarg = g_ptr_array_index (col->idx_sarg_cache, j);
294 			/* XXX - kludge */
295 			node.op = sarg->op;
296 			node.value = sarg->value;
297 			//field.value = &mdb->pg_buf[offset + c_offset];
298 			field.value = buf;
299 		       	field.siz = c_len;
300 		       	field.is_null = FALSE;
301 			if (!mdb_test_sarg(mdb, col, &node, &field)) {
302 				/* sarg didn't match, no sense going on */
303 				return 0;
304 			}
305 		}
306 	}
307 	return 1;
308 }
309 /*
310  * pack the pages bitmap
311  */
312 int
mdb_index_pack_bitmap(MdbHandle * mdb,MdbIndexPage * ipg)313 mdb_index_pack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg)
314 {
315 	int mask_bit = 0;
316 	int mask_pos = 0x16;
317 	int mask_byte = 0;
318 	int elem = 0;
319 	int len, start, i;
320 
321 	start = ipg->idx_starts[elem++];
322 
323 	while (start) {
324 		//fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]);
325 		len = ipg->idx_starts[elem] - start;
326 		//fprintf(stdout, "len is %d\n", len);
327 		for (i=0; i < len; i++) {
328 			mask_bit++;
329 			if (mask_bit==8) {
330 				mask_bit=0;
331 				mdb->pg_buf[mask_pos++] = mask_byte;
332 				mask_byte = 0;
333 			}
334 			/* upon reaching the len, set the bit */
335 		}
336 		mask_byte = (1 << mask_bit) | mask_byte;
337 		//fprintf(stdout, "mask byte is %02x at %d\n", mask_byte, mask_pos);
338 		start = ipg->idx_starts[elem++];
339 	}
340 	/* flush the last byte if any */
341 	mdb->pg_buf[mask_pos++] = mask_byte;
342 	/* remember to zero the rest of the bitmap */
343 	for (i = mask_pos; i < 0xf8; i++) {
344 		mdb->pg_buf[mask_pos++] = 0;
345 	}
346 	return 0;
347 }
348 /*
349  * unpack the pages bitmap
350  */
351 int
mdb_index_unpack_bitmap(MdbHandle * mdb,MdbIndexPage * ipg)352 mdb_index_unpack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg)
353 {
354 	int mask_bit = 0;
355 	int mask_pos = 0x16;
356 	int mask_byte;
357 	int start = 0xf8;
358 	int elem = 0;
359 	int len = 0;
360 
361 	ipg->idx_starts[elem++]=start;
362 
363 	//fprintf(stdout, "Unpacking index page %lu\n", ipg->pg);
364 	do {
365 		len = 0;
366 		do {
367 			mask_bit++;
368 			if (mask_bit==8) {
369 				mask_bit=0;
370 				mask_pos++;
371 			}
372 			mask_byte = mdb->pg_buf[mask_pos];
373 			len++;
374 		} while (mask_pos <= 0xf8 && !((1 << mask_bit) & mask_byte));
375 		//fprintf(stdout, "%d %d %d %d\n", mask_pos, mask_bit, mask_byte, len);
376 
377 		start += len;
378 		if (mask_pos < 0xf8) ipg->idx_starts[elem++]=start;
379 
380 	} while (mask_pos < 0xf8);
381 
382 	/* if we zero the next element, so we don't pick up the last pages starts*/
383 	ipg->idx_starts[elem]=0;
384 
385 	return elem;
386 }
387 /*
388  * find the next entry on a page (either index or leaf). Uses state information
389  * stored in the MdbIndexPage across calls.
390  */
391 int
mdb_index_find_next_on_page(MdbHandle * mdb,MdbIndexPage * ipg)392 mdb_index_find_next_on_page(MdbHandle *mdb, MdbIndexPage *ipg)
393 {
394 	if (!ipg->pg) return 0;
395 
396 	/* if this page has not been unpacked to it */
397 	if (!ipg->idx_starts[0]){
398 		//fprintf(stdout, "Unpacking page %d\n", ipg->pg);
399 		mdb_index_unpack_bitmap(mdb, ipg);
400 	}
401 
402 
403 	if (ipg->idx_starts[ipg->start_pos + 1]==0) return 0;
404 	ipg->len = ipg->idx_starts[ipg->start_pos+1] - ipg->idx_starts[ipg->start_pos];
405 	ipg->start_pos++;
406 	//fprintf(stdout, "Start pos %d\n", ipg->start_pos);
407 
408 	return ipg->len;
409 }
mdb_index_page_reset(MdbIndexPage * ipg)410 void mdb_index_page_reset(MdbIndexPage *ipg)
411 {
412 	ipg->offset = 0xf8; /* start byte of the index entries */
413 	ipg->start_pos=0;
414 	ipg->len = 0;
415 	ipg->idx_starts[0]=0;
416 }
mdb_index_page_init(MdbIndexPage * ipg)417 void mdb_index_page_init(MdbIndexPage *ipg)
418 {
419 	memset(ipg, 0, sizeof(MdbIndexPage));
420 	mdb_index_page_reset(ipg);
421 }
422 /*
423  * find the next leaf page if any given a chain. Assumes any exhausted leaf
424  * pages at the end of the chain have been peeled off before the call.
425  */
426 MdbIndexPage *
mdb_find_next_leaf(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)427 mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
428 {
429 	MdbIndexPage *ipg, *newipg;
430 	guint32 pg;
431 	guint passed = 0;
432 
433 	ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
434 
435 	/*
436 	 * If we are at the first page deep and it's not an index page then
437 	 * we are simply done. (there is no page to find
438 	 */
439 
440 	if (mdb->pg_buf[0]==MDB_PAGE_LEAF) {
441 		/* Indexes can have leaves at the end that don't appear
442 		 * in the upper tree, stash the last index found so
443 		 * we can follow it at the end.  */
444 		chain->last_leaf_found = ipg->pg;
445 		return ipg;
446 	}
447 
448 	/*
449 	 * apply sargs here, currently we don't
450 	 */
451 	do {
452 		ipg->len = 0;
453 		//printf("finding next on pg %lu\n", ipg->pg);
454 		if (!mdb_index_find_next_on_page(mdb, ipg)) {
455 			//printf("find_next_on_page returned 0\n");
456 			return 0;
457 		}
458 		pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 3);
459 		//printf("Looking at pg %lu at %lu %d\n", pg, ipg->offset, ipg->len);
460 		ipg->offset += ipg->len;
461 
462 		/*
463 		 * add to the chain and call this function
464 		 * recursively.
465 		 */
466 		newipg = mdb_chain_add_page(mdb, chain, pg);
467 		newipg = mdb_find_next_leaf(mdb, idx, chain);
468 		//printf("returning pg %lu\n",newipg->pg);
469 		return newipg;
470 	} while (!passed);
471 	/* no more pages */
472 	return NULL;
473 
474 }
475 MdbIndexPage *
mdb_chain_add_page(MdbHandle * mdb,MdbIndexChain * chain,guint32 pg)476 mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg)
477 {
478 	MdbIndexPage *ipg;
479 
480 	chain->cur_depth++;
481 	if (chain->cur_depth > MDB_MAX_INDEX_DEPTH) {
482 		fprintf(stderr,"Error! maximum index depth of %d exceeded.  This is probably due to a programming bug, If you are confident that your indexes really are this deep, adjust MDB_MAX_INDEX_DEPTH in mdbtools.h and recompile.\n", MDB_MAX_INDEX_DEPTH);
483 		exit(1);
484 	}
485 	ipg = &(chain->pages[chain->cur_depth - 1]);
486 	mdb_index_page_init(ipg);
487 	ipg->pg = pg;
488 
489 	return ipg;
490 }
491 /*
492  * returns the bottom page of the IndexChain, if IndexChain is empty it
493  * initializes it by reading idx->first_pg (the root page)
494  */
495 MdbIndexPage *
mdb_index_read_bottom_pg(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)496 mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
497 {
498 	MdbIndexPage *ipg;
499 
500 	/*
501 	 * if it's new use the root index page (idx->first_pg)
502 	 */
503 	if (!chain->cur_depth) {
504 		ipg = &(chain->pages[0]);
505 		mdb_index_page_init(ipg);
506 		chain->cur_depth = 1;
507 		ipg->pg = idx->first_pg;
508 		if (!(ipg = mdb_find_next_leaf(mdb, idx, chain)))
509 			return 0;
510 	} else {
511 		ipg = &(chain->pages[chain->cur_depth - 1]);
512 		ipg->len = 0;
513 	}
514 
515 	mdb_read_pg(mdb, ipg->pg);
516 
517 	return ipg;
518 }
519 /*
520  * unwind the stack and search for new leaf node
521  */
522 MdbIndexPage *
mdb_index_unwind(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)523 mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
524 {
525 	MdbIndexPage *ipg;
526 
527 	//printf("page %lu finished\n",ipg->pg);
528 	if (chain->cur_depth==1) {
529 		//printf("cur_depth == 1 we're out\n");
530 		return NULL;
531 	}
532 	/*
533 	* unwind the stack until we find something or reach
534 	* the top.
535 	*/
536 	ipg = NULL;
537 	while (chain->cur_depth>1 && ipg==NULL) {
538 		//printf("chain depth %d\n", chain->cur_depth);
539 		chain->cur_depth--;
540 		ipg = mdb_find_next_leaf(mdb, idx, chain);
541 		if (ipg) mdb_index_find_next_on_page(mdb, ipg);
542 	}
543 	if (chain->cur_depth==1) {
544 		//printf("last leaf %lu\n", chain->last_leaf_found);
545 		return NULL;
546 	}
547 	return ipg;
548 }
549 /*
550  * the main index function.
551  * caller provides an index chain which is the current traversal of index
552  * pages from the root page to the leaf.  Initially passed as blank,
553  * mdb_index_find_next will store it's state information here. Each invocation
554  * then picks up where the last one left off, allowing us to scroll through
555  * the index one by one.
556  *
557  * Sargs are applied here but also need to be applied on the whole row b/c
558  * text columns may return false positives due to hashing and non-index
559  * columns with sarg values can't be tested here.
560  */
561 int
mdb_index_find_next(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain,guint32 * pg,guint16 * row)562 mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 *pg, guint16 *row)
563 {
564 	MdbIndexPage *ipg;
565 	int passed = 0;
566 	int idx_sz;
567 	int idx_start = 0;
568 	MdbColumn *col;
569 
570 	ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
571 
572 	/*
573 	 * loop while the sargs don't match
574 	 */
575 	do {
576 		ipg->len = 0;
577 		/*
578 		 * if no more rows on this leaf, try to find a new leaf
579 		 */
580 		if (!mdb_index_find_next_on_page(mdb, ipg)) {
581 			if (!chain->clean_up_mode) {
582 				if (!(ipg = mdb_index_unwind(mdb, idx, chain)))
583 					chain->clean_up_mode = 1;
584 			}
585 			if (chain->clean_up_mode) {
586 				//fprintf(stdout,"in cleanup mode\n");
587 
588 				if (!chain->last_leaf_found) return 0;
589 				mdb_read_pg(mdb, chain->last_leaf_found);
590 				chain->last_leaf_found = mdb_pg_get_int24(mdb, 0x0c);
591 				//printf("next leaf %lu\n", chain->last_leaf_found);
592 				mdb_read_pg(mdb, chain->last_leaf_found);
593 				/* reuse the chain for cleanup mode */
594 				chain->cur_depth = 1;
595 				ipg = &chain->pages[0];
596 				mdb_index_page_init(ipg);
597 				ipg->pg = chain->last_leaf_found;
598 				//printf("next on page %d\n",
599 				if (!mdb_index_find_next_on_page(mdb, ipg))
600 					return 0;
601 			}
602 		}
603 		*row = mdb->pg_buf[ipg->offset + ipg->len - 1];
604 		*pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4);
605 		//printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len);
606 		col=g_ptr_array_index(idx->table->columns,idx->key_col_num[0]-1);
607 		idx_sz = mdb_col_fixed_size(col);
608 		/* handle compressed indexes, single key indexes only? */
609 		if (idx->num_keys==1 && idx_sz>0 && ipg->len - 4 < idx_sz) {
610 			//printf("short index found\n");
611 			//buffer_dump(ipg->cache_value, 0, idx_sz);
612 			memcpy(&ipg->cache_value[idx_sz - (ipg->len - 4)], &mdb->pg_buf[ipg->offset], ipg->len);
613 			//buffer_dump(ipg->cache_value, 0, idx_sz);
614 		} else {
615 			idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
616 			memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz);
617 		}
618 
619 		//idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
620 		passed = mdb_index_test_sargs(mdb, idx, ipg->cache_value, idx_sz);
621 
622 		ipg->offset += ipg->len;
623 	} while (!passed);
624 
625 	//fprintf(stdout,"len = %d pos %d\n", ipg->len, ipg->mask_pos);
626 	//buffer_dump(mdb->pg_buf, ipg->offset, ipg->offset+ipg->len-1);
627 
628 	return ipg->len;
629 }
630 /*
631  * XXX - FIX ME
632  * This function is grossly inefficient.  It scans the entire index building
633  * an IndexChain to a specific row.  We should be checking the index pages
634  * for matches against the indexed fields to find the proper leaf page, but
635  * getting it working first and then make it fast!
636  */
637 int
mdb_index_find_row(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain,guint32 pg,guint16 row)638 mdb_index_find_row(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 pg, guint16 row)
639 {
640 	MdbIndexPage *ipg;
641 	int passed = 0;
642 	guint32 datapg;
643 	guint16 datarow;
644 
645 	ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
646 
647 	do {
648 		ipg->len = 0;
649 		/*
650 		 * if no more rows on this leaf, try to find a new leaf
651 		 */
652 		if (!mdb_index_find_next_on_page(mdb, ipg)) {
653 			/* back to top? We're done */
654 			if (chain->cur_depth==1)
655 				return 0;
656 
657 			/*
658 			 * unwind the stack until we find something or reach
659 			 * the top.
660 			 */
661 			while (chain->cur_depth>1) {
662 				chain->cur_depth--;
663 				if (!(ipg = mdb_find_next_leaf(mdb, idx, chain)))
664 					return 0;
665 				mdb_index_find_next_on_page(mdb, ipg);
666 			}
667 			if (chain->cur_depth==1)
668 				return 0;
669 		}
670 		/* test row and pg */
671 		datarow = mdb->pg_buf[ipg->offset + ipg->len - 1];
672 		datapg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4);
673 
674 		if (datapg == pg && datarow == row) {
675 			passed = 1;
676 		}
677 		ipg->offset += ipg->len;
678 	} while (!passed);
679 
680 	/* index chain from root to leaf should now be in "chain" */
681 	return 1;
682 }
683 
mdb_index_walk(MdbTableDef * table,MdbIndex * idx)684 void mdb_index_walk(MdbTableDef *table, MdbIndex *idx)
685 {
686 MdbHandle *mdb = table->entry->mdb;
687 int cur_pos = 0;
688 unsigned char marker;
689 MdbColumn *col;
690 unsigned int i;
691 
692 	if (idx->num_keys!=1) return;
693 
694 	mdb_read_pg(mdb, idx->first_pg);
695 	cur_pos = 0xf8;
696 
697 	for (i=0;i<idx->num_keys;i++) {
698 		marker = mdb->pg_buf[cur_pos++];
699 		col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
700 		//printf("column %d coltype %d col_size %d (%d)\n",i,col->col_type, mdb_col_fixed_size(col), col->col_size);
701 	}
702 }
703 void
mdb_index_dump(MdbTableDef * table,MdbIndex * idx)704 mdb_index_dump(MdbTableDef *table, MdbIndex *idx)
705 {
706 	unsigned int i;
707 	MdbColumn *col;
708 
709 	fprintf(stdout,"index number     %d\n", idx->index_num);
710 	fprintf(stdout,"index name       %s\n", idx->name);
711 	fprintf(stdout,"index first page %d\n", idx->first_pg);
712 	fprintf(stdout,"index rows       %d\n", idx->num_rows);
713 	if (idx->index_type==1) fprintf(stdout,"index is a primary key\n");
714 	for (i=0;i<idx->num_keys;i++) {
715 		col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
716 		fprintf(stdout,"Column %s(%d) Sorted %s Unique: %s\n",
717 			col->name,
718 			idx->key_col_num[i],
719 			idx->key_col_order[i]==MDB_ASC ? "ascending" : "descending",
720 			idx->flags & MDB_IDX_UNIQUE ? "Yes" : "No"
721 			);
722 	}
723 	mdb_index_walk(table, idx);
724 }
725 /*
726  * compute_cost tries to assign a cost to a given index using the sargs
727  * available in this query.
728  *
729  * Indexes with no matching sargs are assigned 0
730  * Unique indexes are preferred over non-uniques
731  * Operator preference is equal, like, isnull, others
732  */
mdb_index_compute_cost(MdbTableDef * table,MdbIndex * idx)733 int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx)
734 {
735 	unsigned int i;
736 	MdbColumn *col;
737 	MdbSarg *sarg = NULL;
738 	int not_all_equal = 0;
739 
740 	if (!idx->num_keys) return 0;
741 	if (idx->num_keys > 1) {
742 		for (i=0;i<idx->num_keys;i++) {
743 			col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
744 			if (col->sargs) sarg = g_ptr_array_index (col->sargs, 0);
745 			if (!sarg || sarg->op != MDB_EQUAL) not_all_equal++;
746 		}
747 	}
748 
749 	col=g_ptr_array_index(table->columns,idx->key_col_num[0]-1);
750 	/*
751 	 * if this is the first key column and there are no sargs,
752 	 * then this index is useless.
753 	 */
754 	if (!col->num_sargs) return 0;
755 
756 	sarg = g_ptr_array_index (col->sargs, 0);
757 
758 	/*
759 	 * a like with a wild card first is useless as a sarg */
760 	if (sarg->op == MDB_LIKE && sarg->value.s[0]=='%')
761 		return 0;
762 
763 	/*
764 	 * this needs a lot of tweaking.
765 	 */
766 	if (idx->flags & MDB_IDX_UNIQUE) {
767 		if (idx->num_keys == 1) {
768 			//printf("op is %d\n", sarg->op);
769 			switch (sarg->op) {
770 				case MDB_EQUAL:
771 					return 1; break;
772 				case MDB_LIKE:
773 					return 4; break;
774 				case MDB_ISNULL:
775 					return 12; break;
776 				default:
777 					return 8; break;
778 			}
779 		} else {
780 			switch (sarg->op) {
781 				case MDB_EQUAL:
782 					if (not_all_equal) return 2;
783 					else return 1;
784 					break;
785 				case MDB_LIKE:
786 					return 6; break;
787 				case MDB_ISNULL:
788 					return 12; break;
789 				default:
790 					return 9; break;
791 			}
792 		}
793 	} else {
794 		if (idx->num_keys == 1) {
795 			switch (sarg->op) {
796 				case MDB_EQUAL:
797 					return 2; break;
798 				case MDB_LIKE:
799 					return 5; break;
800 				case MDB_ISNULL:
801 					return 12; break;
802 				default:
803 					return 10; break;
804 			}
805 		} else {
806 			switch (sarg->op) {
807 				case MDB_EQUAL:
808 					if (not_all_equal) return 3;
809 					else return 2;
810 					break;
811 				case MDB_LIKE:
812 					return 7; break;
813 				case MDB_ISNULL:
814 					return 12; break;
815 				default:
816 					return 11; break;
817 			}
818 		}
819 	}
820 	return 0;
821 }
822 /*
823  * choose_index runs mdb_index_compute_cost for each available index and picks
824  * the best.
825  *
826  * Returns strategy to use (table scan, or index scan)
827  */
828 MdbStrategy
mdb_choose_index(MdbTableDef * table,int * choice)829 mdb_choose_index(MdbTableDef *table, int *choice)
830 {
831 	unsigned int i;
832 	MdbIndex *idx;
833 	int cost = 0;
834 	int least = 99;
835 
836 	*choice = -1;
837 	for (i=0;i<table->num_idxs;i++) {
838 		idx = g_ptr_array_index (table->indices, i);
839 		cost = mdb_index_compute_cost(table, idx);
840 		//printf("cost for %s is %d\n", idx->name, cost);
841 		if (cost && cost < least) {
842 			least = cost;
843 			*choice = i;
844 		}
845 	}
846 	/* and the winner is: *choice */
847 	if (least==99) return MDB_TABLE_SCAN;
848 	return MDB_INDEX_SCAN;
849 }
850 void
mdb_index_scan_init(MdbHandle * mdb,MdbTableDef * table)851 mdb_index_scan_init(MdbHandle *mdb, MdbTableDef *table)
852 {
853 	int i;
854 
855 	if (mdb_get_option(MDB_USE_INDEX) && mdb_choose_index(table, &i) == MDB_INDEX_SCAN) {
856 		table->strategy = MDB_INDEX_SCAN;
857 		table->scan_idx = g_ptr_array_index (table->indices, i);
858 		table->chain = g_malloc0(sizeof(MdbIndexChain));
859 		table->mdbidx = mdb_clone_handle(mdb);
860 		mdb_read_pg(table->mdbidx, table->scan_idx->first_pg);
861 		//printf("best index is %s\n",table->scan_idx->name);
862 	}
863 	//printf("TABLE SCAN? %d\n", table->strategy);
864 }
865 void
mdb_index_scan_free(MdbTableDef * table)866 mdb_index_scan_free(MdbTableDef *table)
867 {
868 	if (table->chain) {
869 		g_free(table->chain);
870 		table->chain = NULL;
871 	}
872 	if (table->mdbidx) {
873 		mdb_close(table->mdbidx);
874 		table->mdbidx = NULL;
875 	}
876 }
877 
mdb_free_indices(GPtrArray * indices)878 void mdb_free_indices(GPtrArray *indices)
879 {
880 	unsigned int i;
881 
882 	if (!indices) return;
883 	for (i=0; i<indices->len; i++)
884 		g_free (g_ptr_array_index(indices, i));
885 	g_ptr_array_free(indices, TRUE);
886 }
887