1 /* MDB Tools - A library for reading MS Access database files
2  * Copyright (C) 2000 Brian Bruns
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #include "mdbtools.h"
20 
21 #ifdef DMALLOC
22 #include "dmalloc.h"
23 #endif
24 
25 /*
26 typedef struct {
27 	int		pg_size;
28 	guint16		row_count_offset;
29 	guint16		tab_num_rows_offset;
30 	guint16		tab_num_cols_offset;
31 	guint16		tab_num_idxs_offset;
32 	guint16		tab_num_ridxs_offset;
33 	guint16		tab_usage_map_offset;
34 	guint16		tab_first_dpg_offset;
35 	guint16		tab_cols_start_offset;
36 	guint16		tab_ridx_entry_size;
37 	guint16		col_flags_offset;
38 	guint16		col_size_offset;
39 	guint16		col_num_offset;
40 	guint16		tab_col_entry_size;
41 	guint16         tab_free_map_offset;
42 	guint16         tab_col_offset_var;
43 	guint16         tab_col_offset_fixed;
44 	guint16         tab_row_col_num_offset;
45 } MdbFormatConstants;
46 */
47 MdbFormatConstants MdbJet4Constants = {
48 	4096, 0x0c, 16, 45, 47, 51, 55, 56, 63, 12, 15, 23, 5, 25, 59, 7, 21, 9
49 };
50 MdbFormatConstants MdbJet3Constants = {
51 	2048, 0x08, 12, 25, 27, 31, 35, 36, 43, 8, 13, 16, 1, 18, 39, 3, 14, 5
52 };
53 
54 typedef struct _RC4_KEY
55 {
56 	unsigned char state[256];
57 	unsigned char x;
58 	unsigned char y;
59 } RC4_KEY;
60 
61 #define swap_byte(x,y) t = *(x); *(x) = *(y); *(y) = t
62 
63 static ssize_t _mdb_read_pg(MdbHandle *mdb, void *pg_buf, unsigned long pg);
64 
RC4_set_key(RC4_KEY * key,int key_data_len,unsigned char * key_data_ptr)65 static void RC4_set_key(RC4_KEY *key, int key_data_len, unsigned char *key_data_ptr)
66 {
67 	unsigned char t;
68 	unsigned char index1;
69 	unsigned char index2;
70 	unsigned char* state;
71 	short counter;
72 
73 	state = &key->state[0];
74 	for(counter = 0; counter < 256; counter++)
75 		state[counter] = counter;
76 	key->x = 0;
77 	key->y = 0;
78 	index1 = 0;
79 	index2 = 0;
80 	for(counter = 0; counter < 256; counter++) {
81 		index2 = (key_data_ptr[index1] + state[counter] + index2) % 256;
82 		swap_byte(&state[counter], &state[index2]);
83 		index1 = (index1 + 1) % key_data_len;
84 	}
85 }
86 
87 /*
88  * this algorithm does 'encrypt in place' instead of inbuff/outbuff
89  * note also: encryption and decryption use same routine
90  * implementation supplied by (Adam Back) at <adam at cypherspace dot org>
91  */
92 
RC4(RC4_KEY * key,int buffer_len,unsigned char * buff)93 static void RC4(RC4_KEY *key, int buffer_len, unsigned char * buff)
94 {
95 	unsigned char t;
96 	unsigned char x;
97 	unsigned char y;
98 	unsigned char* state;
99 	unsigned char xorIndex;
100 	short counter;
101 
102 	x = key->x;
103 	y = key->y;
104 	state = &key->state[0];
105 	for(counter = 0; counter < buffer_len; counter++) {
106 		x = (x + 1) % 256;
107 		y = (state[x] + y) % 256;
108 		swap_byte(&state[x], &state[y]);
109 		xorIndex = (state[x] + state[y]) % 256;
110 		buff[counter] ^= state[xorIndex];
111 	}
112 	key->x = x;
113 	key->y = y;
114 }
115 
116 
117 /**
118  * mdb_find_file:
119  * @filename: path to MDB (database) file
120  *
121  * Finds and returns the absolute path to an MDB file.  Function will first try
122  * to fstat file as passed, then search through the $MDBPATH if not found.
123  *
124  * Return value: gchar pointer to absolute path. Caller is responsible for
125  * freeing.
126  **/
127 
mdb_find_file(const char * file_name)128 static char *mdb_find_file(const char *file_name)
129 {
130 	struct stat status;
131 	gchar *mdbpath, **dir, *tmpfname;
132 	unsigned int i = 0;
133 
134 	/* try the provided file name first */
135 	if (!stat(file_name, &status)) {
136 		char *result;
137 		result = g_strdup(file_name);
138 		if (!result)
139 			fprintf(stderr, "Can't alloc filename\n");
140 		return result;
141 	}
142 
143 	/* Now pull apart $MDBPATH and try those */
144 	mdbpath = (gchar *) getenv("MDBPATH");
145 	/* no path, can't find file */
146 	if (!mdbpath || !strlen(mdbpath)) return NULL;
147 
148 	dir = g_strsplit(mdbpath, ":", 0);
149 	while (dir[i]) {
150 		if (!strlen(dir[i])) continue;
151 		tmpfname = g_strconcat(dir[i++], "/", file_name, NULL);
152 		if (!stat(tmpfname, &status)) {
153 			g_strfreev(dir);
154 			return tmpfname;
155 		}
156 		g_free(tmpfname);
157 	}
158 	g_strfreev(dir);
159 	return NULL;
160 }
161 
162 /**
163  * mdb_set_encoding:
164  * @mdb: Handle to MDB database file
165  * @encoding_name: encoding name for MDB (database) file in JET3 version.
166  *                 A copy of the string will be created.
167  *
168  * Sets encoding name for MDB (database) file in JET3 version.
169  * JET3 databases have no usincode support but only ANSI code page (e.g. CP1252)
170  * (not ISO), so you need to decide what code page strings in the MDB file are encoded in.
171  *
172  * Use this function after mdb_open()) but BEFORE any operation which reads text strings
173  * from the MDB file.
174  * "MDB_JET3_CHARSET" environment variable has priority over this setting.
175  *
176  **/
mdb_set_encoding(MdbHandle * mdb,const char * encoding_name)177 void mdb_set_encoding(MdbHandle *mdb, const char *encoding_name)
178 {
179 #ifdef HAVE_ICONV
180 	mdb_iconv_close(mdb);
181 	g_free(mdb->jet3_iconv_code);
182 	mdb->jet3_iconv_code = g_strdup(encoding_name);
183 	mdb_iconv_init(mdb);
184 #endif
185 }
186 
187 /**
188  * mdb_open:
189  * @filename: path to MDB (database) file
190  * @flags: MDB_NOFLAGS for read-only, MDB_WRITABLE for read/write
191  *
192  * Opens an MDB file and returns an MdbHandle to it.  MDB File may be relative
193  * to the current directory, a full path to the file, or relative to a
194  * component of $MDBPATH.
195  *
196  * Return value: pointer to MdbHandle structure.
197  **/
mdb_open(const char * filename,MdbFileFlags flags)198 MdbHandle *mdb_open(const char *filename, MdbFileFlags flags)
199 {
200 	MdbHandle *mdb;
201 	int key[] = {0x86, 0xfb, 0xec, 0x37, 0x5d, 0x44, 0x9c, 0xfa, 0xc6, 0x5e, 0x28, 0xe6, 0x13, 0xb6};
202 	int j, pos;
203 	int open_flags;
204 
205 	mdb = (MdbHandle *) g_malloc0(sizeof(MdbHandle));
206 #if !MDB_NO_BACKENDS
207 	mdb_set_default_backend(mdb, "access");
208 #endif
209 #ifdef HAVE_ICONV
210 	mdb->jet3_iconv_code = 0;
211 	mdb->iconv_in = (iconv_t)-1;
212 	mdb->iconv_out = (iconv_t)-1;
213 #endif
214 	/* need something to bootstrap with, reassign after page 0 is read */
215 	mdb->fmt = &MdbJet3Constants;
216 	mdb->f = (MdbFile *) g_malloc0(sizeof(MdbFile));
217 	mdb->f->refs = 1;
218 	mdb->f->fd = -1;
219 	mdb->f->filename = mdb_find_file(filename);
220 	if (!mdb->f->filename) {
221 		fprintf(stderr, "File not found\n");
222 		mdb_close(mdb);
223 		return NULL;
224 	}
225 	if (flags & MDB_WRITABLE) {
226 		mdb->f->writable = TRUE;
227 		open_flags = O_RDWR;
228 	} else {
229 		open_flags = O_RDONLY;
230 	}
231 
232 #ifdef _WIN32
233 	open_flags |= O_BINARY;
234 #endif
235 
236 	mdb->f->fd = open(mdb->f->filename, open_flags);
237 
238 	if (mdb->f->fd==-1) {
239 		fprintf(stderr,"Couldn't open file %s\n",mdb->f->filename);
240 		mdb_close(mdb);
241 		return NULL;
242 	}
243 	if (!mdb_read_pg(mdb, 0)) {
244 		fprintf(stderr,"Couldn't read first page.\n");
245 		mdb_close(mdb);
246 		return NULL;
247 	}
248 	if (mdb->pg_buf[0] != 0) {
249 		mdb_close(mdb);
250 		return NULL;
251 	}
252 	mdb->f->jet_version = mdb_get_int32((char *)mdb->pg_buf, 0x14);
253 	switch(mdb->f->jet_version) {
254 	case MDB_VER_JET3:
255 		mdb->fmt = &MdbJet3Constants;
256 		break;
257 	case MDB_VER_JET4:
258 	case MDB_VER_ACCDB_2007:
259 	case MDB_VER_ACCDB_2010:
260 		mdb->fmt = &MdbJet4Constants;
261 		break;
262 	default:
263 		fprintf(stderr,"Unknown Jet version.\n");
264 		mdb_close(mdb);
265 		return NULL;
266 	}
267 	mdb->f->db_key = mdb_get_int32((char *)mdb->pg_buf, 0x3e);
268 	/* I don't know if this value is valid for some versions?
269 	 * it doesn't seem to be valid for the databases I have
270 	 *
271 	 * f->db_key ^= 0xe15e01b9;
272 	 */
273 	mdb->f->db_key ^= 0x4ebc8afb;
274 	/* fprintf(stderr, "Encrypted file, RC4 key seed= %d\n", mdb->f->db_key); */
275 	if (mdb->f->db_key) {
276 		/* write is not supported for encrypted files yet */
277 		mdb->f->writable = FALSE;
278 		/* that should be enough, but reopen the file read only just to be
279 		 * sure we don't write invalid data */
280 		close(mdb->f->fd);
281 		open_flags = O_RDONLY;
282 #ifdef _WIN32
283 		open_flags |= O_BINARY;
284 #endif
285 		mdb->f->fd = open(mdb->f->filename, open_flags);
286 		if (mdb->f->fd==-1) {
287 			fprintf(stderr, "Couldn't ropen file %s in read only\n", mdb->f->filename);
288 			mdb_close(mdb);
289 			return NULL;
290 		}
291 	}
292 
293 	/* get the db password located at 0x42 bytes into the file */
294 	for (pos=0;pos<14;pos++) {
295 		j = mdb_get_int32((char *)mdb->pg_buf, 0x42+pos);
296 		j ^= key[pos];
297 		if ( j != 0)
298 			mdb->f->db_passwd[pos] = j;
299 		else
300 			mdb->f->db_passwd[pos] = '\0';
301 	}
302 
303 	mdb_iconv_init(mdb);
304 
305 	return mdb;
306 }
307 
308 /**
309  * mdb_close:
310  * @mdb: Handle to open MDB database file
311  *
312  * Dereferences MDB file, closes if reference count is 0, and destroys handle.
313  *
314  **/
315 void
mdb_close(MdbHandle * mdb)316 mdb_close(MdbHandle *mdb)
317 {
318 	if (!mdb) return;
319 	mdb_free_catalog(mdb);
320 #if !MDB_NO_STATS
321 	g_free(mdb->stats);
322 #endif
323 #if !MDB_NO_BACKENDS
324 	g_free(mdb->backend_name);
325 #endif
326 	if (mdb->f) {
327 		if (mdb->f->refs > 1) {
328 			mdb->f->refs--;
329 		} else {
330 			if (mdb->f->fd != -1) close(mdb->f->fd);
331 			g_free(mdb->f->filename);
332 			g_free(mdb->f);
333 		}
334 	}
335 
336 	mdb_iconv_close(mdb);
337 
338 #ifdef HAVE_ICONV
339 	g_free(mdb->jet3_iconv_code);
340 #endif
341 	g_free(mdb);
342 }
343 /**
344  * mdb_clone_handle:
345  * @mdb: Handle to open MDB database file
346  *
347  * Clones an existing database handle.  Cloned handle shares the file descriptor
348  * but has its own page buffer, page position, and similar internal variables.
349  *
350  * Return value: new handle to the database.
351  */
mdb_clone_handle(MdbHandle * mdb)352 MdbHandle *mdb_clone_handle(MdbHandle *mdb)
353 {
354 	MdbHandle *newmdb;
355 	MdbCatalogEntry *entry, *data;
356 	unsigned int i;
357 
358 	newmdb = (MdbHandle *) g_memdup(mdb, sizeof(MdbHandle));
359 #if !MDB_NO_STATS
360 	newmdb->stats = NULL;
361 #endif
362 	newmdb->catalog = g_ptr_array_new();
363 	for (i=0;i<mdb->num_catalog;i++) {
364 		entry = g_ptr_array_index(mdb->catalog,i);
365 		data = g_memdup(entry,sizeof(MdbCatalogEntry));
366 		g_ptr_array_add(newmdb->catalog, data);
367 	}
368 #if !MDB_NO_BACKENDS
369 	newmdb->backend_name = NULL;
370 #endif
371 	if (mdb->f) {
372 		mdb->f->refs++;
373 	}
374 #ifdef HAVE_ICONV
375 	newmdb->jet3_iconv_code = g_strdup(mdb->jet3_iconv_code);
376 #endif
377 	mdb_iconv_init(newmdb);
378 
379 	return newmdb;
380 }
381 
382 /*
383 ** mdb_read a wrapper for read that bails if anything is wrong
384 */
mdb_read_pg(MdbHandle * mdb,unsigned long pg)385 ssize_t mdb_read_pg(MdbHandle *mdb, unsigned long pg)
386 {
387 	ssize_t len;
388 
389 	if (pg && mdb->cur_pg == pg) return mdb->fmt->pg_size;
390 
391 	len = _mdb_read_pg(mdb, mdb->pg_buf, pg);
392 	//fprintf(stderr, "read page %d type %02x\n", pg, mdb->pg_buf[0]);
393 	mdb->cur_pg = pg;
394 	/* kan - reset the cur_pos on a new page read */
395 	mdb->cur_pos = 0; /* kan */
396 	return len;
397 }
mdb_read_alt_pg(MdbHandle * mdb,unsigned long pg)398 ssize_t mdb_read_alt_pg(MdbHandle *mdb, unsigned long pg)
399 {
400 	ssize_t len;
401 
402 	len = _mdb_read_pg(mdb, mdb->alt_pg_buf, pg);
403 	return len;
404 }
_mdb_read_pg(MdbHandle * mdb,void * pg_buf,unsigned long pg)405 static ssize_t _mdb_read_pg(MdbHandle *mdb, void *pg_buf, unsigned long pg)
406 {
407 	ssize_t len;
408 	struct stat status;
409 	off_t offset = pg * mdb->fmt->pg_size;
410 
411 	if (fstat(mdb->f->fd, &status) != 0) {
412 		perror("fstat");
413 		return 0;
414 	}
415         if (status.st_size < offset) {
416                 fprintf(stderr,"offset %jd is beyond EOF\n",offset);
417                 return 0;
418         }
419 #if !MDB_NO_STATS
420 	if (mdb->stats && mdb->stats->collect)
421 		mdb->stats->pg_reads++;
422 #endif
423 	if (lseek(mdb->f->fd, offset, SEEK_SET) == -1) {
424 		perror("lseek");
425 		return 0;
426 	}
427 	len = read(mdb->f->fd,pg_buf,mdb->fmt->pg_size);
428 	if (len==-1) {
429 		perror("read");
430 		return 0;
431 	}
432 	else if (len<mdb->fmt->pg_size) {
433 		/* fprintf(stderr,"EOF reached %d bytes returned.\n",len, mdb->fmt->pg_size); */
434 		return 0;
435 	}
436 	/*
437 	 * unencrypt the page if necessary.
438 	 * it might make sense to cache the unencrypted data blocks?
439 	 */
440 	if (pg != 0 && mdb->f->db_key != 0)
441 	{
442 		RC4_KEY rc4_key;
443 		unsigned int tmp_key = mdb->f->db_key ^ pg;
444 		RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key);
445 		RC4(&rc4_key, mdb->fmt->pg_size, pg_buf);
446 	}
447 
448 	return len;
449 }
mdb_swap_pgbuf(MdbHandle * mdb)450 void mdb_swap_pgbuf(MdbHandle *mdb)
451 {
452 char tmpbuf[MDB_PGSIZE];
453 
454 	memcpy(tmpbuf,mdb->pg_buf, MDB_PGSIZE);
455 	memcpy(mdb->pg_buf,mdb->alt_pg_buf, MDB_PGSIZE);
456 	memcpy(mdb->alt_pg_buf,tmpbuf,MDB_PGSIZE);
457 }
458 
459 
mdb_get_byte(void * buf,int offset)460 unsigned char mdb_get_byte(void *buf, int offset)
461 {
462 	return ((unsigned char *)(buf))[offset];
463 }
mdb_pg_get_byte(MdbHandle * mdb,int offset)464 unsigned char mdb_pg_get_byte(MdbHandle *mdb, int offset)
465 {
466 	if (offset < 0 || (offset+1) > (int)mdb->fmt->pg_size) return -1;
467 	mdb->cur_pos++;
468 	return mdb->pg_buf[offset];
469 }
470 
mdb_get_int16(char * buf,int offset)471 int mdb_get_int16(char *buf, int offset)
472 {
473 	guint16 l;
474 	memcpy(&l, buf + offset, 2);
475 	return (int)GUINT16_FROM_LE(l);
476 }
mdb_pg_get_int16(MdbHandle * mdb,int offset)477 int mdb_pg_get_int16(MdbHandle *mdb, int offset)
478 {
479 	if (offset < 0 || (offset+2) > (int)mdb->fmt->pg_size) return -1;
480 	mdb->cur_pos+=2;
481 	return mdb_get_int16((char *)mdb->pg_buf, offset);
482 }
483 
mdb_get_int32_msb(char * buf,int offset)484 long mdb_get_int32_msb(char *buf, int offset)
485 {
486 	gint32 l;
487 	memcpy(&l, buf + offset, 4);
488 	return (long)GINT32_FROM_BE(l);
489 }
mdb_get_int32(char * buf,int offset)490 long mdb_get_int32(char *buf, int offset)
491 {
492 	gint32 l;
493 	memcpy(&l, buf + offset, 4);
494 	return (long)GINT32_FROM_LE(l);
495 }
mdb_pg_get_int32(MdbHandle * mdb,int offset)496 long mdb_pg_get_int32(MdbHandle *mdb, int offset)
497 {
498 	if (offset <0 || (offset+4) > (int)mdb->fmt->pg_size) return -1;
499 	mdb->cur_pos+=4;
500 	return mdb_get_int32((char *)mdb->pg_buf, offset);
501 }
502 
mdb_get_single(char * buf,int offset)503 float mdb_get_single(char *buf, int offset)
504 {
505 	union {guint32 g; float f;} f;
506 	memcpy(&f, buf + offset, 4);
507 	f.g = GUINT32_FROM_LE(f.g);
508 	return f.f;
509 }
mdb_pg_get_single(MdbHandle * mdb,int offset)510 float mdb_pg_get_single(MdbHandle *mdb, int offset)
511 {
512        if (offset <0 || (offset+4) > (int)mdb->fmt->pg_size) return -1;
513        mdb->cur_pos+=4;
514        return mdb_get_single((char *)mdb->pg_buf, offset);
515 }
516 
mdb_get_double(char * buf,int offset)517 double mdb_get_double(char *buf, int offset)
518 {
519 	union {guint64 g; double d;} d;
520 	memcpy(&d, buf + offset, 8);
521 	d.g = GUINT64_FROM_LE(d.g);
522 	return d.d;
523 }
mdb_pg_get_double(MdbHandle * mdb,int offset)524 double mdb_pg_get_double(MdbHandle *mdb, int offset)
525 {
526 	if (offset <0 || (offset+8) > (int)mdb->fmt->pg_size) return -1;
527 	mdb->cur_pos+=8;
528 	return mdb_get_double((char *)mdb->pg_buf, offset);
529 }
530 
531 
532 int
mdb_set_pos(MdbHandle * mdb,int pos)533 mdb_set_pos(MdbHandle *mdb, int pos)
534 {
535 	if (pos<0 || pos >= (int)mdb->fmt->pg_size) return 0;
536 
537 	mdb->cur_pos=pos;
538 	return pos;
539 }
mdb_get_pos(MdbHandle * mdb)540 int mdb_get_pos(MdbHandle *mdb)
541 {
542 	return mdb->cur_pos;
543 }
544