1 /* MDB Tools - A library for reading MS Access database file
2  * Copyright (C) 2000-2004 Brian Bruns
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #include "mdbtools.h"
20 
21 #ifdef DMALLOC
22 #include "dmalloc.h"
23 #endif
24 
25 MdbIndexPage *mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain);
26 MdbIndexPage *mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg);
27 
28 char idx_to_text[] = {
29 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0-7     0x00-0x07 */
30 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8-15    0x09-0x0f */
31 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16-23   0x10-0x17 */
32 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24-31   0x19-0x1f */
33 ' ',  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 32-39   0x20-0x27 */
34 0x00, 0x00, 0x00, 0x00, 0x00, ' ',  ' ',  0x00, /* 40-47   0x29-0x2f */
35 'V',  'W',  'X',  'Y',  'Z',  '[',  '\\', ']',  /* 48-55   0x30-0x37 */
36 '^',  '_',  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56-63   0x39-0x3f */
37 0x00, '`',  'a',  'b',  'd',  'f',  'g',  'h',  /* 64-71   0x40-0x47 */
38 'i',  'j',  'k',  'l',  'm',  'o',  'p',  'r',  /* 72-79   0x49-0x4f  H */
39 's',  't',  'u',  'v',  'w',  'x',  'z',  '{',  /* 80-87   0x50-0x57  P */
40 '|',  '}',  '~',  '5',  '6',  '7',  '8',  '9',  /* 88-95   0x59-0x5f */
41 0x00, '`',  'a',  'b',  'd',  'f',  'g',  'h',  /* 96-103  0x60-0x67 */
42 'i',  'j',  'k',  'l',  'm',  'o',  'p',  'r',  /* 014-111 0x69-0x6f  h */
43 's',  't',  'u',  'v',  'w',  'x',  'z',  '{',  /* 112-119 0x70-0x77  p */
44 '|',  '}',  '~',  0x00, 0x00, 0x00, 0x00, 0x00, /* 120-127 0x78-0x7f */
45 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 128-135 0x80-0x87 */
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */
53 0x00, 0x00, 0x00, 0x00, 0x00, '`',  0x00, 0x00, /* 0xc0-0xc7 */
54 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
56 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
57 0x00, '`',  0x00, '`',  '`',  '`',  0x00, 0x00, /* 0xe0-0xe7 */
58 'f',  'f',  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
59 0x00, 0x00, 0x00, 'r',  0x00, 0x00, 'r',  0x00, /* 0xf0-0xf7 */
60 0x81, 0x00, 0x00, 0x00, 'x',  0x00, 0x00, 0x00, /* 0xf8-0xff */
61 };
62 
63 /* JET Red (v4) Index definition byte layouts
64  *
65  * Based on:
66  *
67  * http://jabakobob.net/mdb/table-page.html
68  * https://github.com/jahlborn/jackcess
69  *
70  * plus inspection of JET (Red) 4 databases.  (JET 3 format has fewer
71  * fields -- some of the ones below omitted, and others narrower.)
72  *
73  * See also JET Blue (Extensible Storage Engine) format information:
74  *
75  * https://github.com/libyal/libesedb/blob/master/documentation/Extensible%20Storage%20Engine%20%28ESE%29%20Database%20File%20%28EDB%29%20format.asciidoc
76  *
77  * which is a later Microsoft embedded database format with the same
78  * early base format.
79  *
80  * ----------------------------------------------------------------------
81  * Index Column Definitions:
82  * - for each "non foreign key" index (ie pidx->index_type!=2), a list
83  *   of columns indexed
84  *
85  * Repeated table->num_real_idxs times:
86  *
87  * Offset	Bytes 	Meaning
88  * 0x0000	4	UNKNOWN; seems to be type marker, usually 1923 or 0
89  *
90  * 0x0004       2       Column 1 ID
91  * 0x0006       1       Column 1 Flags
92  * 0x0007       2       Column 2 ID
93  * 0x0009       1       Column 2 Flags
94  * 0x000A       2       Column 3 ID
95  * 0x000C       1       Column 3 Flags
96  * 0x000D       2       Column 4 ID
97  * 0x000F       1       Column 4 Flags
98  * 0x0010       2       Column 5 ID
99  * 0x0012       1       Column 5 Flags
100  * 0x0013       2       Column 6 ID
101  * 0x0015       1       Column 6 Flags
102  * 0x0016       2       Column 7 ID
103  * 0x0018       1       Column 7 Flags
104  * 0x0019       2       Column 8 ID
105  * 0x001B       1       Column 8 Flags
106  * 0x001C       2       Column 9 ID
107  * 0x001E       1       Column 9 Flags
108  * 0x001F       2       Column 10 ID
109  * 0x0021       1       Column 10 Flags
110  *
111  * 0x0022	1	Usage Map row
112  * 0x0023	3	Usage Map page (24-bit)
113  * 0x0026	4	First index page
114  * 0x002A	4	UNKNOWN
115  * 0x002E	2	Index Flags
116  * 0x0030	4	UNKNOWN; seems to always be 0
117  * 0x0034
118  *
119  * Column ID of 0xFFFF (-1) means "not used" or "end of used columns".
120  * Column Flags:
121  * - 0x01 = Ascending
122  *
123  * Index Flags:
124  * - 0x0001 = Unique index
125  * - 0x0002 = Ignore NULLs
126  * - 0x0008 = Required Index
127  *
128  * ----------------------------------------------------------------------
129  * Index Definitions
130  * - for each index (normal, primary key, foreign key), details on the
131  *   index.
132  *
133  * - this appears to be the union of information required for normal/
134  *   primary key indexes, and the information required for foreign key
135  *   indexes.
136  *
137  * Repeated table->num_idxs times:
138  *
139  * Offset	Bytes 	Meaning
140  * 0x0000	4	UNKNOWN; apparently a type marker, usually 1625 or 0
141  * 0x0004	4	Logical Index Number
142  * 0x0008	4	Index Column Definition Entry
143  * 0x000C	1	FK Index Type
144  * 0x000D	4	FK Index Number
145  * 0x0011	4	FK Index Table Page Number
146  * 0x0015	1	Flags: Update Action
147  * 0x0016	1	Flags: Delete Action
148  * 0x0017	1	Index Type
149  * 0x0018	4	UNKNNOWN; seems to always be 0
150  * 0x001B
151  *
152  * Where Index Type is:
153  * 0x01 = normal
154  * 0x01 = primary key
155  * 0x02 = foreign key index reference
156  */
157 
158 /* Debugging helper to dump out raw hex values of index definition */
159 /*
160 static void hexdump(unsigned char *tmpbuf, int size) {
161 	int i;
162 	for (i = 0; i < size; ++i) {
163 		fprintf(stderr, "%02x ", tmpbuf[i]);
164         }
165 	fprintf(stderr, "\n");
166 }
167 */
168 
169 
170 GPtrArray *
mdb_read_indices(MdbTableDef * table)171 mdb_read_indices(MdbTableDef *table)
172 {
173 	MdbCatalogEntry *entry = table->entry;
174 	MdbHandle *mdb = entry->mdb;
175 	MdbFormatConstants *fmt = mdb->fmt;
176 	MdbIndex *pidx;
177 	unsigned int i, j, k;
178 	int key_num, col_num, cleaned_col_num;
179 	int cur_pos, name_sz, idx2_sz, type_offset;
180 	int index_start_pg = mdb->cur_pg;
181 	char *tmpbuf;
182 
183 	table->indices = g_ptr_array_new();
184 
185 	if (IS_JET3(mdb)) {
186 		cur_pos = table->index_start + 39 * table->num_real_idxs;
187 		idx2_sz = 20;
188 		type_offset = 19;
189 	} else {
190 		cur_pos = table->index_start + 52 * table->num_real_idxs;
191 		idx2_sz = 28;
192 		type_offset = 23;
193 	}
194 
195 	/* Read in the definitions of table indexes, into table->indices */
196 
197 	/* num_real_idxs should be the number of indexes other than type 2.
198 	 * It's not always the case. Happens on Northwind Orders table.
199 	 */
200 	//fprintf(stderr, "num_idxs:%d num_real_idxs:%d\n", table->num_idxs, table->num_real_idxs);
201 
202 	table->num_real_idxs = 0;
203 	tmpbuf = g_malloc(idx2_sz);
204 	for (i=0;i<table->num_idxs;i++) {
205 		read_pg_if_n(mdb, tmpbuf, &cur_pos, idx2_sz);
206 		//fprintf(stderr, "Index defn: ");
207 		//hexdump((unsigned char *)tmpbuf, idx2_sz);
208 		pidx = (MdbIndex *) g_malloc0(sizeof(MdbIndex));
209 		pidx->table = table;
210 		pidx->index_num = mdb_get_int16(tmpbuf, 4);
211 		pidx->index_type = tmpbuf[type_offset];
212 		g_ptr_array_add(table->indices, pidx);
213 		/*
214 		{
215 			gint32 idx_marker = mdb_get_int32(tmpbuf, 0);
216 		        gint32 index_col_def_num = mdb_get_int16(tmpbuf, 8);
217 			gint8 rel_idx_type = tmpbuf[0x0c];
218 			gint32 rel_idx_number = mdb_get_int32(tmpbuf, 0x0d);
219 			gint32 rel_idx_page = mdb_get_int32(tmpbuf, 0x11);
220 			gint8 update_action_flags = tmpbuf[0x15];
221 			gint8 delete_action_flags = tmpbuf[0x16];
222 			fprintf(stderr, "idx #%d: num2:%d num3:%d type:%d\n", i, pidx->index_num, index_col_def_num, pidx->index_type);
223 			fprintf(stderr, "idx #%d: %d %d %d %d %d/%d\n", i, idx_marker, rel_idx_type, rel_idx_number, rel_idx_page, update_action_flags, delete_action_flags);
224 		}*/
225 		if (pidx->index_type!=2)
226 			table->num_real_idxs++;
227 	}
228 	//fprintf(stderr, "num_idxs:%d num_real_idxs:%d\n", table->num_idxs, table->num_real_idxs);
229 	g_free(tmpbuf);
230 
231 	/* Pick up the names of each index */
232 	for (i=0;i<table->num_idxs;i++) {
233 		pidx = g_ptr_array_index (table->indices, i);
234 		if (IS_JET3(mdb)) {
235 			name_sz=read_pg_if_8(mdb, &cur_pos);
236 		} else {
237 			name_sz=read_pg_if_16(mdb, &cur_pos);
238 		}
239 		tmpbuf = g_malloc(name_sz);
240 		read_pg_if_n(mdb, (char *)tmpbuf, &cur_pos, name_sz);
241 		mdb_unicode2ascii(mdb, (char *)tmpbuf, name_sz, pidx->name, MDB_MAX_OBJ_NAME);
242 		g_free(tmpbuf);
243 		//fprintf(stderr, "index %d type %d name %s\n", pidx->index_num, pidx->index_type, pidx->name);
244 	}
245 
246 	/* Pick up the column definitions for normal/primary key indexes */
247 	/* NOTE: Match should possibly be by index_col_def_num, rather
248          * than index_num; but in files encountered both seem to be the
249          * same (so left with index_num until a counter example is found).
250          */
251 	mdb_read_alt_pg(mdb, entry->table_pg);
252 	mdb_read_pg(mdb, index_start_pg);
253 	cur_pos = table->index_start;
254 	for (i=0;i<table->num_real_idxs;i++) {
255 		/*	Debugging print out, commented out
256 		{
257 			gchar *tmpbuf = (gchar *) g_malloc(0x34);
258 			int now_pos = cur_pos;
259 			read_pg_if_n(mdb, tmpbuf, &now_pos, 0x34);
260 			fprintf(stderr, "Index defn: ");
261 			hexdump((unsigned char *)tmpbuf, 0x34);
262 			g_free(tmpbuf);
263 		}*/
264 
265 		if (!IS_JET3(mdb)) cur_pos += 4;
266 		/* look for index number i */
267 		for (j=0; j<table->num_idxs; ++j) {
268 			pidx = g_ptr_array_index (table->indices, j);
269 			if (pidx->index_type!=2 && pidx->index_num==i)
270 				break;
271 		}
272 		if (j==table->num_idxs) {
273 			fprintf(stderr, "ERROR: can't find index #%d.\n", i);
274 			continue;
275 		}
276 		//fprintf(stderr, "index %d #%d (%s) index_type:%d\n", i, pidx->index_num, pidx->name, pidx->index_type);
277 
278 		pidx->num_rows = mdb_get_int32((char *)mdb->alt_pg_buf,
279 				fmt->tab_cols_start_offset +
280 				(pidx->index_num*fmt->tab_ridx_entry_size));
281 		/*
282 		fprintf(stderr, "ridx block1 i:%d data1:0x%08x data2:0x%08x\n",
283 			i,
284 			(unsigned int)mdb_get_int32(mdb->pg_buf,
285 				fmt->tab_cols_start_offset + pidx->index_num * fmt->tab_ridx_entry_size),
286 			(unsigned int)mdb_get_int32(mdb->pg_buf,
287 				fmt->tab_cols_start_offset + pidx->index_num * fmt->tab_ridx_entry_size +4));
288 		fprintf(stderr, "pidx->num_rows:%d\n", pidx->num_rows);*/
289 
290 		/* Read columns in each index */
291 		key_num=0;
292 		for (j=0;j<MDB_MAX_IDX_COLS;j++) {
293 			col_num=read_pg_if_16(mdb,&cur_pos);
294 			if (col_num == 0xFFFF) {
295 				cur_pos++;
296 				continue;
297 			}
298 			/* here we have the internal column number that does not
299 			 * always match the table columns because of deletions */
300 			cleaned_col_num = -1;
301 			for (k=0; k<table->num_cols; k++) {
302 				MdbColumn *col = g_ptr_array_index(table->columns,k);
303 				if (col->col_num == col_num) {
304 					cleaned_col_num = k;
305 					break;
306 				}
307 			}
308 			if (cleaned_col_num==-1) {
309 				fprintf(stderr, "CRITICAL: can't find column with internal id %d in index %s\n",
310 					col_num, pidx->name);
311 				cur_pos++;
312 				continue;
313 			}
314 			/* set column number to a 1 based column number and store */
315 			pidx->key_col_num[key_num] = cleaned_col_num + 1;
316 			pidx->key_col_order[key_num] =
317 				(read_pg_if_8(mdb, &cur_pos)) ? MDB_ASC : MDB_DESC;
318 			//fprintf(stderr, "component %d using column #%d (internally %d)\n", j, cleaned_col_num,  col_num);
319 			key_num++;
320 		}
321 		pidx->num_keys = key_num;
322 
323 		if (0)  // DEBUGGING ONLY
324 		{
325 			gint32 usage_map = read_pg_if_32(mdb, &cur_pos);
326 			fprintf(stderr, "pidx->unknown_pre_first_pg:0x%08x\n", usage_map);
327 		} else {
328 			cur_pos += 4;    // Skip Usage map information
329 		}
330 		pidx->first_pg = read_pg_if_32(mdb, &cur_pos);
331 
332 		if (!IS_JET3(mdb)) cur_pos += 4;
333 
334 		pidx->flags = read_pg_if_8(mdb, &cur_pos);
335 		//fprintf(stderr, "pidx->first_pg:%d pidx->flags:0x%02x\n",	pidx->first_pg, pidx->flags);
336 		if (!IS_JET3(mdb)) cur_pos += 5;
337 	}
338 	return NULL;
339 }
340 void
mdb_index_hash_text(char * text,char * hash)341 mdb_index_hash_text(char *text, char *hash)
342 {
343 	unsigned int k;
344 
345 	for (k=0;k<strlen(text);k++) {
346 		int c = ((unsigned char *)(text))[k];
347 		hash[k] = idx_to_text[c];
348 		if (!(hash[k])) fprintf(stderr,
349 				"No translation available for %02x %d\n", c, c);
350 	}
351 	hash[strlen(text)]='\0';
352 }
353 /*
354  * reverse the order of the column for hashing
355  */
356 void
mdb_index_swap_n(unsigned char * src,int sz,unsigned char * dest)357 mdb_index_swap_n(unsigned char *src, int sz, unsigned char *dest)
358 {
359 	int i, j = 0;
360 
361 	for (i = sz-1; i >= 0; i--) {
362 		dest[j++] = src[i];
363 	}
364 }
365 void
mdb_index_cache_sarg(MdbColumn * col,MdbSarg * sarg,MdbSarg * idx_sarg)366 mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg)
367 {
368 	//guint32 cache_int;
369 	unsigned char *c;
370 
371 	switch (col->col_type) {
372 		case MDB_TEXT:
373 		mdb_index_hash_text(sarg->value.s, idx_sarg->value.s);
374 		break;
375 
376 		case MDB_LONGINT:
377 		idx_sarg->value.i = GUINT32_SWAP_LE_BE(sarg->value.i);
378 		//cache_int = sarg->value.i * -1;
379 		c = (unsigned char *) &(idx_sarg->value.i);
380 		c[0] |= 0x80;
381 		//printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]);
382 		break;
383 
384 		case MDB_INT:
385 		break;
386 
387 		default:
388 		break;
389 	}
390 }
391 #if 0
392 int
393 mdb_index_test_sarg(MdbHandle *mdb, MdbColumn *col, MdbSarg *sarg, int offset, int len)
394 {
395 char tmpbuf[256];
396 int lastchar;
397 
398 	switch (col->col_type) {
399 		case MDB_BYTE:
400 			return mdb_test_int(sarg, mdb_pg_get_byte(mdb, offset));
401 			break;
402 		case MDB_INT:
403 			return mdb_test_int(sarg, mdb_pg_get_int16(mdb, offset));
404 			break;
405 		case MDB_LONGINT:
406 			return mdb_test_int(sarg, mdb_pg_get_int32(mdb, offset));
407 			break;
408 		case MDB_TEXT:
409 			strncpy(tmpbuf, &mdb->pg_buf[offset],255);
410 			lastchar = len > 255 ? 255 : len;
411 			tmpbuf[lastchar]='\0';
412 			return mdb_test_string(sarg, tmpbuf);
413 		default:
414 			fprintf(stderr, "Calling mdb_test_sarg on unknown type.  Add code to mdb_test_sarg() for type %d\n",col->col_type);
415 			break;
416 	}
417 	return 1;
418 }
419 #endif
420 int
mdb_index_test_sargs(MdbHandle * mdb,MdbIndex * idx,char * buf,int len)421 mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, char *buf, int len)
422 {
423 	unsigned int i, j;
424 	MdbColumn *col;
425 	MdbTableDef *table = idx->table;
426 	MdbSarg *idx_sarg;
427 	MdbSarg *sarg;
428 	MdbField field;
429 	MdbSargNode node;
430 	//int c_offset = 0,
431 	int c_len;
432 
433 	//fprintf(stderr,"mdb_index_test_sargs called on ");
434 	//for (i=0;i<len;i++)
435 		//fprintf(stderr,"%02x ",buf[i]); //mdb->pg_buf[offset+i]);
436 	//fprintf(stderr,"\n");
437 	for (i=0;i<idx->num_keys;i++) {
438 		//c_offset++; /* the per column null indicator/flags */
439 		col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
440 		/*
441 		 * This will go away eventually
442 		 */
443 		if (col->col_type==MDB_TEXT) {
444 			//c_len = strlen(&mdb->pg_buf[offset + c_offset]);
445 			c_len = strlen(buf);
446 		} else {
447 			c_len = col->col_size;
448 			//fprintf(stderr,"Only text types currently supported.  How did we get here?\n");
449 		}
450 		/*
451 		 * If we have no cached index values for this column,
452 		 * create them.
453 		 */
454 		if (col->num_sargs && !col->idx_sarg_cache) {
455 			col->idx_sarg_cache = g_ptr_array_new();
456 			for (j=0;j<col->num_sargs;j++) {
457 				sarg = g_ptr_array_index (col->sargs, j);
458 				idx_sarg = g_memdup(sarg,sizeof(MdbSarg));
459 				//printf("calling mdb_index_cache_sarg\n");
460 				mdb_index_cache_sarg(col, sarg, idx_sarg);
461 				g_ptr_array_add(col->idx_sarg_cache, idx_sarg);
462 			}
463 		}
464 
465 		for (j=0;j<col->num_sargs;j++) {
466 			sarg = g_ptr_array_index (col->idx_sarg_cache, j);
467 			/* XXX - kludge */
468 			node.op = sarg->op;
469 			node.value = sarg->value;
470 			//field.value = &mdb->pg_buf[offset + c_offset];
471 			field.value = buf;
472 		       	field.siz = c_len;
473 		       	field.is_null = FALSE;
474 			if (!mdb_test_sarg(mdb, col, &node, &field)) {
475 				/* sarg didn't match, no sense going on */
476 				return 0;
477 			}
478 		}
479 	}
480 	return 1;
481 }
482 /*
483  * pack the pages bitmap
484  */
485 int
mdb_index_pack_bitmap(MdbHandle * mdb,MdbIndexPage * ipg)486 mdb_index_pack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg)
487 {
488 	int mask_bit = 0;
489 	int mask_pos = 0x16;
490 	int mask_byte = 0;
491 	int elem = 0;
492 	int len, start, i;
493 
494 	start = ipg->idx_starts[elem++];
495 
496 	while (start) {
497 		//fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]);
498 		len = ipg->idx_starts[elem] - start;
499 		//fprintf(stdout, "len is %d\n", len);
500 		for (i=0; i < len; i++) {
501 			mask_bit++;
502 			if (mask_bit==8) {
503 				mask_bit=0;
504 				mdb->pg_buf[mask_pos++] = mask_byte;
505 				mask_byte = 0;
506 			}
507 			/* upon reaching the len, set the bit */
508 		}
509 		mask_byte = (1 << mask_bit) | mask_byte;
510 		//fprintf(stdout, "mask byte is %02x at %d\n", mask_byte, mask_pos);
511 		start = ipg->idx_starts[elem++];
512 	}
513 	/* flush the last byte if any */
514 	mdb->pg_buf[mask_pos++] = mask_byte;
515 	/* remember to zero the rest of the bitmap */
516 	for (i = mask_pos; i < 0xf8; i++) {
517 		mdb->pg_buf[mask_pos++] = 0;
518 	}
519 	return 0;
520 }
521 /*
522  * unpack the pages bitmap
523  */
524 int
mdb_index_unpack_bitmap(MdbHandle * mdb,MdbIndexPage * ipg)525 mdb_index_unpack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg)
526 {
527 	int mask_bit = 0;
528 	int mask_pos = 0x16;
529 	int mask_byte;
530 	int start = 0xf8;
531 	int elem = 0;
532 	int len = 0;
533 
534 	ipg->idx_starts[elem++]=start;
535 
536 	//fprintf(stdout, "Unpacking index page %lu\n", ipg->pg);
537 	do {
538 		len = 0;
539 		do {
540 			mask_bit++;
541 			if (mask_bit==8) {
542 				mask_bit=0;
543 				mask_pos++;
544 			}
545 			mask_byte = mdb->pg_buf[mask_pos];
546 			len++;
547 		} while (mask_pos <= 0xf8 && !((1 << mask_bit) & mask_byte));
548 		//fprintf(stdout, "%d %d %d %d\n", mask_pos, mask_bit, mask_byte, len);
549 
550 		start += len;
551 		if (mask_pos < 0xf8) ipg->idx_starts[elem++]=start;
552 
553 	} while (mask_pos < 0xf8);
554 
555 	/* if we zero the next element, so we don't pick up the last pages starts*/
556 	ipg->idx_starts[elem]=0;
557 
558 	return elem;
559 }
560 /*
561  * find the next entry on a page (either index or leaf). Uses state information
562  * stored in the MdbIndexPage across calls.
563  */
564 int
mdb_index_find_next_on_page(MdbHandle * mdb,MdbIndexPage * ipg)565 mdb_index_find_next_on_page(MdbHandle *mdb, MdbIndexPage *ipg)
566 {
567 	if (!ipg->pg) return 0;
568 
569 	/* if this page has not been unpacked to it */
570 	if (!ipg->idx_starts[0]){
571 		//fprintf(stdout, "Unpacking page %d\n", ipg->pg);
572 		mdb_index_unpack_bitmap(mdb, ipg);
573 	}
574 
575 
576 	if (ipg->idx_starts[ipg->start_pos + 1]==0) return 0;
577 	ipg->len = ipg->idx_starts[ipg->start_pos+1] - ipg->idx_starts[ipg->start_pos];
578 	ipg->start_pos++;
579 	//fprintf(stdout, "Start pos %d\n", ipg->start_pos);
580 
581 	return ipg->len;
582 }
mdb_index_page_reset(MdbIndexPage * ipg)583 void mdb_index_page_reset(MdbIndexPage *ipg)
584 {
585 	ipg->offset = 0xf8; /* start byte of the index entries */
586 	ipg->start_pos=0;
587 	ipg->len = 0;
588 	ipg->idx_starts[0]=0;
589 }
mdb_index_page_init(MdbIndexPage * ipg)590 void mdb_index_page_init(MdbIndexPage *ipg)
591 {
592 	memset(ipg, 0, sizeof(MdbIndexPage));
593 	mdb_index_page_reset(ipg);
594 }
595 /*
596  * find the next leaf page if any given a chain. Assumes any exhausted leaf
597  * pages at the end of the chain have been peeled off before the call.
598  */
599 MdbIndexPage *
mdb_find_next_leaf(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)600 mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
601 {
602 	MdbIndexPage *ipg, *newipg;
603 	guint32 pg;
604 	guint passed = 0;
605 
606 	ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
607 
608 	/*
609 	 * If we are at the first page deep and it's not an index page then
610 	 * we are simply done. (there is no page to find
611 	 */
612 
613 	if (mdb->pg_buf[0]==MDB_PAGE_LEAF) {
614 		/* Indexes can have leaves at the end that don't appear
615 		 * in the upper tree, stash the last index found so
616 		 * we can follow it at the end.  */
617 		chain->last_leaf_found = ipg->pg;
618 		return ipg;
619 	}
620 
621 	/*
622 	 * apply sargs here, currently we don't
623 	 */
624 	do {
625 		ipg->len = 0;
626 		//printf("finding next on pg %lu\n", ipg->pg);
627 		if (!mdb_index_find_next_on_page(mdb, ipg)) {
628 			//printf("find_next_on_page returned 0\n");
629 			return 0;
630 		}
631 		pg = mdb_get_int32_msb((char *)mdb->pg_buf, ipg->offset + ipg->len - 3) >> 8;
632 		//printf("Looking at pg %lu at %lu %d\n", pg, ipg->offset, ipg->len);
633 		ipg->offset += ipg->len;
634 
635 		/*
636 		 * add to the chain and call this function
637 		 * recursively.
638 		 */
639 		newipg = mdb_chain_add_page(mdb, chain, pg);
640 		newipg = mdb_find_next_leaf(mdb, idx, chain);
641 		//printf("returning pg %lu\n",newipg->pg);
642 		return newipg;
643 	} while (!passed);
644 	/* no more pages */
645 	return NULL;
646 
647 }
648 MdbIndexPage *
mdb_chain_add_page(MdbHandle * mdb,MdbIndexChain * chain,guint32 pg)649 mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg)
650 {
651 	MdbIndexPage *ipg;
652 
653 	chain->cur_depth++;
654 	if (chain->cur_depth > MDB_MAX_INDEX_DEPTH) {
655 		fprintf(stderr,"Error! maximum index depth of %d exceeded.  This is probably due to a programming bug, If you are confident that your indexes really are this deep, adjust MDB_MAX_INDEX_DEPTH in mdbtools.h and recompile.\n", MDB_MAX_INDEX_DEPTH);
656 		exit(1);
657 	}
658 	ipg = &(chain->pages[chain->cur_depth - 1]);
659 	mdb_index_page_init(ipg);
660 	ipg->pg = pg;
661 
662 	return ipg;
663 }
664 /*
665  * returns the bottom page of the IndexChain, if IndexChain is empty it
666  * initializes it by reading idx->first_pg (the root page)
667  */
668 MdbIndexPage *
mdb_index_read_bottom_pg(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)669 mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
670 {
671 	MdbIndexPage *ipg;
672 
673 	/*
674 	 * if it's new use the root index page (idx->first_pg)
675 	 */
676 	if (!chain->cur_depth) {
677 		ipg = &(chain->pages[0]);
678 		mdb_index_page_init(ipg);
679 		chain->cur_depth = 1;
680 		ipg->pg = idx->first_pg;
681 		if (!(ipg = mdb_find_next_leaf(mdb, idx, chain)))
682 			return 0;
683 	} else {
684 		ipg = &(chain->pages[chain->cur_depth - 1]);
685 		ipg->len = 0;
686 	}
687 
688 	mdb_read_pg(mdb, ipg->pg);
689 
690 	return ipg;
691 }
692 /*
693  * unwind the stack and search for new leaf node
694  */
695 MdbIndexPage *
mdb_index_unwind(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain)696 mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
697 {
698 	MdbIndexPage *ipg;
699 
700 	//printf("page %lu finished\n",ipg->pg);
701 	if (chain->cur_depth==1) {
702 		//printf("cur_depth == 1 we're out\n");
703 		return NULL;
704 	}
705 	/*
706 	* unwind the stack until we find something or reach
707 	* the top.
708 	*/
709 	ipg = NULL;
710 	while (chain->cur_depth>1 && ipg==NULL) {
711 		//printf("chain depth %d\n", chain->cur_depth);
712 		chain->cur_depth--;
713 		ipg = mdb_find_next_leaf(mdb, idx, chain);
714 		if (ipg) mdb_index_find_next_on_page(mdb, ipg);
715 	}
716 	if (chain->cur_depth==1) {
717 		//printf("last leaf %lu\n", chain->last_leaf_found);
718 		return NULL;
719 	}
720 	return ipg;
721 }
722 /*
723  * the main index function.
724  * caller provides an index chain which is the current traversal of index
725  * pages from the root page to the leaf.  Initially passed as blank,
726  * mdb_index_find_next will store it's state information here. Each invocation
727  * then picks up where the last one left off, allowing us to scroll through
728  * the index one by one.
729  *
730  * Sargs are applied here but also need to be applied on the whole row b/c
731  * text columns may return false positives due to hashing and non-index
732  * columns with sarg values can't be tested here.
733  */
734 int
mdb_index_find_next(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain,guint32 * pg,guint16 * row)735 mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 *pg, guint16 *row)
736 {
737 	MdbIndexPage *ipg;
738 	int passed = 0;
739 	int idx_sz;
740 	int idx_start = 0;
741 	MdbColumn *col;
742 	guint32 pg_row;
743 
744 	ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
745 
746 	/*
747 	 * loop while the sargs don't match
748 	 */
749 	do {
750 		ipg->len = 0;
751 		/*
752 		 * if no more rows on this leaf, try to find a new leaf
753 		 */
754 		if (!mdb_index_find_next_on_page(mdb, ipg)) {
755 			if (!chain->clean_up_mode) {
756 				if (!(ipg = mdb_index_unwind(mdb, idx, chain)))
757 					chain->clean_up_mode = 1;
758 			}
759 			if (chain->clean_up_mode) {
760 				//fprintf(stdout,"in cleanup mode\n");
761 
762 				if (!chain->last_leaf_found) return 0;
763 				mdb_read_pg(mdb, chain->last_leaf_found);
764 				chain->last_leaf_found = mdb_get_int32(
765 					(char *)mdb->pg_buf, 0x0c);
766 				//printf("next leaf %lu\n", chain->last_leaf_found);
767 				mdb_read_pg(mdb, chain->last_leaf_found);
768 				/* reuse the chain for cleanup mode */
769 				chain->cur_depth = 1;
770 				ipg = &chain->pages[0];
771 				mdb_index_page_init(ipg);
772 				ipg->pg = chain->last_leaf_found;
773 				//printf("next on page %d\n",
774 				if (!mdb_index_find_next_on_page(mdb, ipg))
775 					return 0;
776 			}
777 		}
778 		pg_row = mdb_get_int32_msb((char *)mdb->pg_buf, ipg->offset + ipg->len - 4);
779 		*row = pg_row & 0xff;
780 		*pg = pg_row >> 8;
781 		//printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len);
782 		col=g_ptr_array_index(idx->table->columns,idx->key_col_num[0]-1);
783 		idx_sz = mdb_col_fixed_size(col);
784 		/* handle compressed indexes, single key indexes only? */
785 		if (idx->num_keys==1 && idx_sz>0 && ipg->len - 4 < idx_sz) {
786 			//printf("short index found\n");
787 			//mdb_buffer_dump(ipg->cache_value, 0, idx_sz);
788 			memcpy(&ipg->cache_value[idx_sz - (ipg->len - 4)], &mdb->pg_buf[ipg->offset], ipg->len);
789 			//mdb_buffer_dump(ipg->cache_value, 0, idx_sz);
790 		} else {
791 			idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
792 			memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz);
793 		}
794 
795 		//idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
796 		passed = mdb_index_test_sargs(mdb, idx, (char *)(ipg->cache_value), idx_sz);
797 
798 		ipg->offset += ipg->len;
799 	} while (!passed);
800 
801 	//fprintf(stdout,"len = %d pos %d\n", ipg->len, ipg->mask_pos);
802 	//mdb_buffer_dump(mdb->pg_buf, ipg->offset, ipg->len);
803 
804 	return ipg->len;
805 }
806 /*
807  * XXX - FIX ME
808  * This function is grossly inefficient.  It scans the entire index building
809  * an IndexChain to a specific row.  We should be checking the index pages
810  * for matches against the indexed fields to find the proper leaf page, but
811  * getting it working first and then make it fast!
812  */
813 int
mdb_index_find_row(MdbHandle * mdb,MdbIndex * idx,MdbIndexChain * chain,guint32 pg,guint16 row)814 mdb_index_find_row(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 pg, guint16 row)
815 {
816 	MdbIndexPage *ipg;
817 	int passed = 0;
818 	guint32 pg_row = (pg << 8) | (row & 0xff);
819 	guint32 datapg_row;
820 
821 	ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
822 
823 	do {
824 		ipg->len = 0;
825 		/*
826 		 * if no more rows on this leaf, try to find a new leaf
827 		 */
828 		if (!mdb_index_find_next_on_page(mdb, ipg)) {
829 			/* back to top? We're done */
830 			if (chain->cur_depth==1)
831 				return 0;
832 
833 			/*
834 			 * unwind the stack until we find something or reach
835 			 * the top.
836 			 */
837 			while (chain->cur_depth>1) {
838 				chain->cur_depth--;
839 				if (!(ipg = mdb_find_next_leaf(mdb, idx, chain)))
840 					return 0;
841 				mdb_index_find_next_on_page(mdb, ipg);
842 			}
843 			if (chain->cur_depth==1)
844 				return 0;
845 		}
846 		/* test row and pg */
847 		datapg_row = mdb_get_int32_msb((char *)mdb->pg_buf, ipg->offset + ipg->len - 4);
848 		if (pg_row == datapg_row) {
849 			passed = 1;
850 		}
851 		ipg->offset += ipg->len;
852 	} while (!passed);
853 
854 	/* index chain from root to leaf should now be in "chain" */
855 	return 1;
856 }
857 
mdb_index_walk(MdbTableDef * table,MdbIndex * idx)858 void mdb_index_walk(MdbTableDef *table, MdbIndex *idx)
859 {
860 /*
861 	MdbHandle *mdb = table->entry->mdb;
862 	int cur_pos = 0;
863 	unsigned char marker;
864 	MdbColumn *col;
865 	unsigned int i;
866 
867 	if (idx->num_keys!=1) return;
868 
869 	mdb_read_pg(mdb, idx->first_pg);
870 	cur_pos = 0xf8;
871 
872 	for (i=0;i<idx->num_keys;i++) {
873 		marker = mdb->pg_buf[cur_pos++];
874 		col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
875 		//printf("column %d coltype %d col_size %d (%d)\n",i,col->col_type, mdb_col_fixed_size(col), col->col_size);
876 	}
877 */
878 }
879 void
mdb_index_dump(MdbTableDef * table,MdbIndex * idx)880 mdb_index_dump(MdbTableDef *table, MdbIndex *idx)
881 {
882 	unsigned int i;
883 	MdbColumn *col;
884 
885 	fprintf(stdout,"index number     %d\n", idx->index_num);
886 	fprintf(stdout,"index name       %s\n", idx->name);
887 	fprintf(stdout,"index first page %d\n", idx->first_pg);
888 	fprintf(stdout,"index rows       %d\n", idx->num_rows);
889 	if (idx->index_type==1) fprintf(stdout,"index is a primary key\n");
890 	for (i=0;i<idx->num_keys;i++) {
891 		col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
892 		fprintf(stdout,"Column %s(%d) Sorted %s Unique: %s\n",
893 			col->name,
894 			idx->key_col_num[i],
895 			idx->key_col_order[i]==MDB_ASC ? "ascending" : "descending",
896 			idx->flags & MDB_IDX_UNIQUE ? "Yes" : "No"
897 			);
898 	}
899 	mdb_index_walk(table, idx);
900 }
901 /*
902  * compute_cost tries to assign a cost to a given index using the sargs
903  * available in this query.
904  *
905  * Indexes with no matching sargs are assigned 0
906  * Unique indexes are preferred over non-uniques
907  * Operator preference is equal, like, isnull, others
908  */
mdb_index_compute_cost(MdbTableDef * table,MdbIndex * idx)909 int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx)
910 {
911 	unsigned int i;
912 	MdbColumn *col;
913 	MdbSarg *sarg = NULL;
914 	int not_all_equal = 0;
915 
916 	if (!idx->num_keys) return 0;
917 	if (idx->num_keys > 1) {
918 		for (i=0;i<idx->num_keys;i++) {
919 			col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
920 			if (col->sargs) sarg = g_ptr_array_index (col->sargs, 0);
921 			if (!sarg || sarg->op != MDB_EQUAL) not_all_equal++;
922 		}
923 	}
924 
925 	col=g_ptr_array_index(table->columns,idx->key_col_num[0]-1);
926 	/*
927 	 * if this is the first key column and there are no sargs,
928 	 * then this index is useless.
929 	 */
930 	if (!col->num_sargs) return 0;
931 
932 	sarg = g_ptr_array_index (col->sargs, 0);
933 
934 	/*
935 	 * a like with a wild card first is useless as a sarg */
936 	if (sarg->op == MDB_LIKE && sarg->value.s[0]=='%')
937 		return 0;
938 
939 	/*
940 	 * this needs a lot of tweaking.
941 	 */
942 	if (idx->flags & MDB_IDX_UNIQUE) {
943 		if (idx->num_keys == 1) {
944 			//printf("op is %d\n", sarg->op);
945 			switch (sarg->op) {
946 				case MDB_EQUAL:
947 					return 1; break;
948 				case MDB_LIKE:
949 					return 4; break;
950 				case MDB_ISNULL:
951 					return 12; break;
952 				default:
953 					return 8; break;
954 			}
955 		} else {
956 			switch (sarg->op) {
957 				case MDB_EQUAL:
958 					if (not_all_equal) return 2;
959 					else return 1;
960 					break;
961 				case MDB_LIKE:
962 					return 6; break;
963 				case MDB_ISNULL:
964 					return 12; break;
965 				default:
966 					return 9; break;
967 			}
968 		}
969 	} else {
970 		if (idx->num_keys == 1) {
971 			switch (sarg->op) {
972 				case MDB_EQUAL:
973 					return 2; break;
974 				case MDB_LIKE:
975 					return 5; break;
976 				case MDB_ISNULL:
977 					return 12; break;
978 				default:
979 					return 10; break;
980 			}
981 		} else {
982 			switch (sarg->op) {
983 				case MDB_EQUAL:
984 					if (not_all_equal) return 3;
985 					else return 2;
986 					break;
987 				case MDB_LIKE:
988 					return 7; break;
989 				case MDB_ISNULL:
990 					return 12; break;
991 				default:
992 					return 11; break;
993 			}
994 		}
995 	}
996 	return 0;
997 }
998 /*
999  * choose_index runs mdb_index_compute_cost for each available index and picks
1000  * the best.
1001  *
1002  * Returns strategy to use (table scan, or index scan)
1003  */
1004 MdbStrategy
mdb_choose_index(MdbTableDef * table,int * choice)1005 mdb_choose_index(MdbTableDef *table, int *choice)
1006 {
1007 	unsigned int i;
1008 	MdbIndex *idx;
1009 	int cost = 0;
1010 	int least = 99;
1011 
1012 	*choice = -1;
1013 	for (i=0;i<table->num_idxs;i++) {
1014 		idx = g_ptr_array_index (table->indices, i);
1015 		cost = mdb_index_compute_cost(table, idx);
1016 		//printf("cost for %s is %d\n", idx->name, cost);
1017 		if (cost && cost < least) {
1018 			least = cost;
1019 			*choice = i;
1020 		}
1021 	}
1022 	/* and the winner is: *choice */
1023 	if (least==99) return MDB_TABLE_SCAN;
1024 	return MDB_INDEX_SCAN;
1025 }
1026 void
mdb_index_scan_init(MdbHandle * mdb,MdbTableDef * table)1027 mdb_index_scan_init(MdbHandle *mdb, MdbTableDef *table)
1028 {
1029 	int i;
1030 
1031 	if (mdb_get_option(MDB_USE_INDEX) && mdb_choose_index(table, &i) == MDB_INDEX_SCAN) {
1032 		table->strategy = MDB_INDEX_SCAN;
1033 		table->scan_idx = g_ptr_array_index (table->indices, i);
1034 		table->chain = g_malloc0(sizeof(MdbIndexChain));
1035 		table->mdbidx = mdb_clone_handle(mdb);
1036 		mdb_read_pg(table->mdbidx, table->scan_idx->first_pg);
1037 		//printf("best index is %s\n",table->scan_idx->name);
1038 	}
1039 	//printf("TABLE SCAN? %d\n", table->strategy);
1040 }
1041 void
mdb_index_scan_free(MdbTableDef * table)1042 mdb_index_scan_free(MdbTableDef *table)
1043 {
1044 	if (table->chain) {
1045 		g_free(table->chain);
1046 		table->chain = NULL;
1047 	}
1048 	if (table->mdbidx) {
1049 		mdb_close(table->mdbidx);
1050 		table->mdbidx = NULL;
1051 	}
1052 }
1053 
mdb_free_indices(GPtrArray * indices)1054 void mdb_free_indices(GPtrArray *indices)
1055 {
1056 	unsigned int i;
1057 
1058 	if (!indices) return;
1059 	for (i=0; i<indices->len; i++)
1060 		g_free (g_ptr_array_index(indices, i));
1061 	g_ptr_array_free(indices, TRUE);
1062 }
1063