1 /* MDB Tools - A library for reading MS Access database files
2 * Copyright (C) 2000 Brian Bruns
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "mdbtools.h"
20
21 #ifdef DMALLOC
22 #include "dmalloc.h"
23 #endif
24
25 /*
26 typedef struct {
27 int pg_size;
28 guint16 row_count_offset;
29 guint16 tab_num_rows_offset;
30 guint16 tab_num_cols_offset;
31 guint16 tab_num_idxs_offset;
32 guint16 tab_num_ridxs_offset;
33 guint16 tab_usage_map_offset;
34 guint16 tab_first_dpg_offset;
35 guint16 tab_cols_start_offset;
36 guint16 tab_ridx_entry_size;
37 guint16 col_flags_offset;
38 guint16 col_size_offset;
39 guint16 col_num_offset;
40 guint16 tab_col_entry_size;
41 guint16 tab_free_map_offset;
42 guint16 tab_col_offset_var;
43 guint16 tab_col_offset_fixed;
44 guint16 tab_row_col_num_offset;
45 } MdbFormatConstants;
46 */
47 MdbFormatConstants MdbJet4Constants = {
48 4096, 0x0c, 16, 45, 47, 51, 55, 56, 63, 12, 15, 23, 5, 25, 59, 7, 21, 9
49 };
50 MdbFormatConstants MdbJet3Constants = {
51 2048, 0x08, 12, 25, 27, 31, 35, 36, 43, 8, 13, 16, 1, 18, 39, 3, 14, 5
52 };
53
54 typedef struct _RC4_KEY
55 {
56 unsigned char state[256];
57 unsigned char x;
58 unsigned char y;
59 } RC4_KEY;
60
61 #define swap_byte(x,y) t = *(x); *(x) = *(y); *(y) = t
62
63 static ssize_t _mdb_read_pg(MdbHandle *mdb, void *pg_buf, unsigned long pg);
64
RC4_set_key(RC4_KEY * key,int key_data_len,unsigned char * key_data_ptr)65 static void RC4_set_key(RC4_KEY *key, int key_data_len, unsigned char *key_data_ptr)
66 {
67 unsigned char t;
68 unsigned char index1;
69 unsigned char index2;
70 unsigned char* state;
71 short counter;
72
73 state = &key->state[0];
74 for(counter = 0; counter < 256; counter++)
75 state[counter] = counter;
76 key->x = 0;
77 key->y = 0;
78 index1 = 0;
79 index2 = 0;
80 for(counter = 0; counter < 256; counter++) {
81 index2 = (key_data_ptr[index1] + state[counter] + index2) % 256;
82 swap_byte(&state[counter], &state[index2]);
83 index1 = (index1 + 1) % key_data_len;
84 }
85 }
86
87 /*
88 * this algorithm does 'encrypt in place' instead of inbuff/outbuff
89 * note also: encryption and decryption use same routine
90 * implementation supplied by (Adam Back) at <adam at cypherspace dot org>
91 */
92
RC4(RC4_KEY * key,int buffer_len,unsigned char * buff)93 static void RC4(RC4_KEY *key, int buffer_len, unsigned char * buff)
94 {
95 unsigned char t;
96 unsigned char x;
97 unsigned char y;
98 unsigned char* state;
99 unsigned char xorIndex;
100 short counter;
101
102 x = key->x;
103 y = key->y;
104 state = &key->state[0];
105 for(counter = 0; counter < buffer_len; counter++) {
106 x = (x + 1) % 256;
107 y = (state[x] + y) % 256;
108 swap_byte(&state[x], &state[y]);
109 xorIndex = (state[x] + state[y]) % 256;
110 buff[counter] ^= state[xorIndex];
111 }
112 key->x = x;
113 key->y = y;
114 }
115
116
117 /**
118 * mdb_find_file:
119 * @filename: path to MDB (database) file
120 *
121 * Finds and returns the absolute path to an MDB file. Function will first try
122 * to fstat file as passed, then search through the $MDBPATH if not found.
123 *
124 * Return value: gchar pointer to absolute path. Caller is responsible for
125 * freeing.
126 **/
127
mdb_find_file(const char * file_name)128 static char *mdb_find_file(const char *file_name)
129 {
130 struct stat status;
131 gchar *mdbpath, **dir, *tmpfname;
132 unsigned int i = 0;
133
134 /* try the provided file name first */
135 if (!stat(file_name, &status)) {
136 char *result;
137 result = g_strdup(file_name);
138 if (!result)
139 fprintf(stderr, "Can't alloc filename\n");
140 return result;
141 }
142
143 /* Now pull apart $MDBPATH and try those */
144 mdbpath = (gchar *) getenv("MDBPATH");
145 /* no path, can't find file */
146 if (!mdbpath || !strlen(mdbpath)) return NULL;
147
148 dir = g_strsplit(mdbpath, ":", 0);
149 while (dir[i]) {
150 if (!strlen(dir[i])) continue;
151 tmpfname = g_strconcat(dir[i++], "/", file_name, NULL);
152 if (!stat(tmpfname, &status)) {
153 g_strfreev(dir);
154 return tmpfname;
155 }
156 g_free(tmpfname);
157 }
158 g_strfreev(dir);
159 return NULL;
160 }
161
162 /**
163 * mdb_set_encoding:
164 * @mdb: Handle to MDB database file
165 * @encoding_name: encoding name for MDB (database) file in JET3 version.
166 * A copy of the string will be created.
167 *
168 * Sets encoding name for MDB (database) file in JET3 version.
169 * JET3 databases have no usincode support but only ANSI code page (e.g. CP1252)
170 * (not ISO), so you need to decide what code page strings in the MDB file are encoded in.
171 *
172 * Use this function after mdb_open()) but BEFORE any operation which reads text strings
173 * from the MDB file.
174 * "MDB_JET3_CHARSET" environment variable has priority over this setting.
175 *
176 **/
mdb_set_encoding(MdbHandle * mdb,const char * encoding_name)177 void mdb_set_encoding(MdbHandle *mdb, const char *encoding_name)
178 {
179 #ifdef HAVE_ICONV
180 mdb_iconv_close(mdb);
181 g_free(mdb->jet3_iconv_code);
182 mdb->jet3_iconv_code = g_strdup(encoding_name);
183 mdb_iconv_init(mdb);
184 #endif
185 }
186
187 /**
188 * mdb_open:
189 * @filename: path to MDB (database) file
190 * @flags: MDB_NOFLAGS for read-only, MDB_WRITABLE for read/write
191 *
192 * Opens an MDB file and returns an MdbHandle to it. MDB File may be relative
193 * to the current directory, a full path to the file, or relative to a
194 * component of $MDBPATH.
195 *
196 * Return value: pointer to MdbHandle structure.
197 **/
mdb_open(const char * filename,MdbFileFlags flags)198 MdbHandle *mdb_open(const char *filename, MdbFileFlags flags)
199 {
200 MdbHandle *mdb;
201 int key[] = {0x86, 0xfb, 0xec, 0x37, 0x5d, 0x44, 0x9c, 0xfa, 0xc6, 0x5e, 0x28, 0xe6, 0x13, 0xb6};
202 int j, pos;
203 int open_flags;
204
205 mdb = (MdbHandle *) g_malloc0(sizeof(MdbHandle));
206 #if !MDB_NO_BACKENDS
207 mdb_set_default_backend(mdb, "access");
208 #endif
209 #ifdef HAVE_ICONV
210 mdb->jet3_iconv_code = 0;
211 mdb->iconv_in = (iconv_t)-1;
212 mdb->iconv_out = (iconv_t)-1;
213 #endif
214 /* need something to bootstrap with, reassign after page 0 is read */
215 mdb->fmt = &MdbJet3Constants;
216 mdb->f = (MdbFile *) g_malloc0(sizeof(MdbFile));
217 mdb->f->refs = 1;
218 mdb->f->fd = -1;
219 mdb->f->filename = mdb_find_file(filename);
220 if (!mdb->f->filename) {
221 fprintf(stderr, "File not found\n");
222 mdb_close(mdb);
223 return NULL;
224 }
225 if (flags & MDB_WRITABLE) {
226 mdb->f->writable = TRUE;
227 open_flags = O_RDWR;
228 } else {
229 open_flags = O_RDONLY;
230 }
231
232 #ifdef _WIN32
233 open_flags |= O_BINARY;
234 #endif
235
236 mdb->f->fd = open(mdb->f->filename, open_flags);
237
238 if (mdb->f->fd==-1) {
239 fprintf(stderr,"Couldn't open file %s\n",mdb->f->filename);
240 mdb_close(mdb);
241 return NULL;
242 }
243 if (!mdb_read_pg(mdb, 0)) {
244 fprintf(stderr,"Couldn't read first page.\n");
245 mdb_close(mdb);
246 return NULL;
247 }
248 if (mdb->pg_buf[0] != 0) {
249 mdb_close(mdb);
250 return NULL;
251 }
252 mdb->f->jet_version = mdb_get_int32((char *)mdb->pg_buf, 0x14);
253 switch(mdb->f->jet_version) {
254 case MDB_VER_JET3:
255 mdb->fmt = &MdbJet3Constants;
256 break;
257 case MDB_VER_JET4:
258 case MDB_VER_ACCDB_2007:
259 case MDB_VER_ACCDB_2010:
260 mdb->fmt = &MdbJet4Constants;
261 break;
262 default:
263 fprintf(stderr,"Unknown Jet version.\n");
264 mdb_close(mdb);
265 return NULL;
266 }
267 mdb->f->db_key = mdb_get_int32((char *)mdb->pg_buf, 0x3e);
268 /* I don't know if this value is valid for some versions?
269 * it doesn't seem to be valid for the databases I have
270 *
271 * f->db_key ^= 0xe15e01b9;
272 */
273 mdb->f->db_key ^= 0x4ebc8afb;
274 /* fprintf(stderr, "Encrypted file, RC4 key seed= %d\n", mdb->f->db_key); */
275 if (mdb->f->db_key) {
276 /* write is not supported for encrypted files yet */
277 mdb->f->writable = FALSE;
278 /* that should be enough, but reopen the file read only just to be
279 * sure we don't write invalid data */
280 close(mdb->f->fd);
281 open_flags = O_RDONLY;
282 #ifdef _WIN32
283 open_flags |= O_BINARY;
284 #endif
285 mdb->f->fd = open(mdb->f->filename, open_flags);
286 if (mdb->f->fd==-1) {
287 fprintf(stderr, "Couldn't ropen file %s in read only\n", mdb->f->filename);
288 mdb_close(mdb);
289 return NULL;
290 }
291 }
292
293 /* get the db password located at 0x42 bytes into the file */
294 for (pos=0;pos<14;pos++) {
295 j = mdb_get_int32((char *)mdb->pg_buf, 0x42+pos);
296 j ^= key[pos];
297 if ( j != 0)
298 mdb->f->db_passwd[pos] = j;
299 else
300 mdb->f->db_passwd[pos] = '\0';
301 }
302
303 mdb_iconv_init(mdb);
304
305 return mdb;
306 }
307
308 /**
309 * mdb_close:
310 * @mdb: Handle to open MDB database file
311 *
312 * Dereferences MDB file, closes if reference count is 0, and destroys handle.
313 *
314 **/
315 void
mdb_close(MdbHandle * mdb)316 mdb_close(MdbHandle *mdb)
317 {
318 if (!mdb) return;
319 mdb_free_catalog(mdb);
320 #if !MDB_NO_STATS
321 g_free(mdb->stats);
322 #endif
323 #if !MDB_NO_BACKENDS
324 g_free(mdb->backend_name);
325 #endif
326 if (mdb->f) {
327 if (mdb->f->refs > 1) {
328 mdb->f->refs--;
329 } else {
330 if (mdb->f->fd != -1) close(mdb->f->fd);
331 g_free(mdb->f->filename);
332 g_free(mdb->f);
333 }
334 }
335
336 mdb_iconv_close(mdb);
337
338 #ifdef HAVE_ICONV
339 g_free(mdb->jet3_iconv_code);
340 #endif
341 g_free(mdb);
342 }
343 /**
344 * mdb_clone_handle:
345 * @mdb: Handle to open MDB database file
346 *
347 * Clones an existing database handle. Cloned handle shares the file descriptor
348 * but has its own page buffer, page position, and similar internal variables.
349 *
350 * Return value: new handle to the database.
351 */
mdb_clone_handle(MdbHandle * mdb)352 MdbHandle *mdb_clone_handle(MdbHandle *mdb)
353 {
354 MdbHandle *newmdb;
355 MdbCatalogEntry *entry, *data;
356 unsigned int i;
357
358 newmdb = (MdbHandle *) g_memdup(mdb, sizeof(MdbHandle));
359 #if !MDB_NO_STATS
360 newmdb->stats = NULL;
361 #endif
362 newmdb->catalog = g_ptr_array_new();
363 for (i=0;i<mdb->num_catalog;i++) {
364 entry = g_ptr_array_index(mdb->catalog,i);
365 data = g_memdup(entry,sizeof(MdbCatalogEntry));
366 g_ptr_array_add(newmdb->catalog, data);
367 }
368 #if !MDB_NO_BACKENDS
369 newmdb->backend_name = NULL;
370 #endif
371 if (mdb->f) {
372 mdb->f->refs++;
373 }
374 #ifdef HAVE_ICONV
375 newmdb->jet3_iconv_code = g_strdup(mdb->jet3_iconv_code);
376 #endif
377 mdb_iconv_init(newmdb);
378
379 return newmdb;
380 }
381
382 /*
383 ** mdb_read a wrapper for read that bails if anything is wrong
384 */
mdb_read_pg(MdbHandle * mdb,unsigned long pg)385 ssize_t mdb_read_pg(MdbHandle *mdb, unsigned long pg)
386 {
387 ssize_t len;
388
389 if (pg && mdb->cur_pg == pg) return mdb->fmt->pg_size;
390
391 len = _mdb_read_pg(mdb, mdb->pg_buf, pg);
392 //fprintf(stderr, "read page %d type %02x\n", pg, mdb->pg_buf[0]);
393 mdb->cur_pg = pg;
394 /* kan - reset the cur_pos on a new page read */
395 mdb->cur_pos = 0; /* kan */
396 return len;
397 }
mdb_read_alt_pg(MdbHandle * mdb,unsigned long pg)398 ssize_t mdb_read_alt_pg(MdbHandle *mdb, unsigned long pg)
399 {
400 ssize_t len;
401
402 len = _mdb_read_pg(mdb, mdb->alt_pg_buf, pg);
403 return len;
404 }
_mdb_read_pg(MdbHandle * mdb,void * pg_buf,unsigned long pg)405 static ssize_t _mdb_read_pg(MdbHandle *mdb, void *pg_buf, unsigned long pg)
406 {
407 ssize_t len;
408 struct stat status;
409 off_t offset = pg * mdb->fmt->pg_size;
410
411 if (fstat(mdb->f->fd, &status) != 0) {
412 perror("fstat");
413 return 0;
414 }
415 if (status.st_size < offset) {
416 fprintf(stderr,"offset %jd is beyond EOF\n",offset);
417 return 0;
418 }
419 #if !MDB_NO_STATS
420 if (mdb->stats && mdb->stats->collect)
421 mdb->stats->pg_reads++;
422 #endif
423 if (lseek(mdb->f->fd, offset, SEEK_SET) == -1) {
424 perror("lseek");
425 return 0;
426 }
427 len = read(mdb->f->fd,pg_buf,mdb->fmt->pg_size);
428 if (len==-1) {
429 perror("read");
430 return 0;
431 }
432 else if (len<mdb->fmt->pg_size) {
433 /* fprintf(stderr,"EOF reached %d bytes returned.\n",len, mdb->fmt->pg_size); */
434 return 0;
435 }
436 /*
437 * unencrypt the page if necessary.
438 * it might make sense to cache the unencrypted data blocks?
439 */
440 if (pg != 0 && mdb->f->db_key != 0)
441 {
442 RC4_KEY rc4_key;
443 unsigned int tmp_key = mdb->f->db_key ^ pg;
444 RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key);
445 RC4(&rc4_key, mdb->fmt->pg_size, pg_buf);
446 }
447
448 return len;
449 }
mdb_swap_pgbuf(MdbHandle * mdb)450 void mdb_swap_pgbuf(MdbHandle *mdb)
451 {
452 char tmpbuf[MDB_PGSIZE];
453
454 memcpy(tmpbuf,mdb->pg_buf, MDB_PGSIZE);
455 memcpy(mdb->pg_buf,mdb->alt_pg_buf, MDB_PGSIZE);
456 memcpy(mdb->alt_pg_buf,tmpbuf,MDB_PGSIZE);
457 }
458
459
mdb_get_byte(void * buf,int offset)460 unsigned char mdb_get_byte(void *buf, int offset)
461 {
462 return ((unsigned char *)(buf))[offset];
463 }
mdb_pg_get_byte(MdbHandle * mdb,int offset)464 unsigned char mdb_pg_get_byte(MdbHandle *mdb, int offset)
465 {
466 if (offset < 0 || (offset+1) > (int)mdb->fmt->pg_size) return -1;
467 mdb->cur_pos++;
468 return mdb->pg_buf[offset];
469 }
470
mdb_get_int16(char * buf,int offset)471 int mdb_get_int16(char *buf, int offset)
472 {
473 guint16 l;
474 memcpy(&l, buf + offset, 2);
475 return (int)GUINT16_FROM_LE(l);
476 }
mdb_pg_get_int16(MdbHandle * mdb,int offset)477 int mdb_pg_get_int16(MdbHandle *mdb, int offset)
478 {
479 if (offset < 0 || (offset+2) > (int)mdb->fmt->pg_size) return -1;
480 mdb->cur_pos+=2;
481 return mdb_get_int16((char *)mdb->pg_buf, offset);
482 }
483
mdb_get_int32_msb(char * buf,int offset)484 long mdb_get_int32_msb(char *buf, int offset)
485 {
486 gint32 l;
487 memcpy(&l, buf + offset, 4);
488 return (long)GINT32_FROM_BE(l);
489 }
mdb_get_int32(char * buf,int offset)490 long mdb_get_int32(char *buf, int offset)
491 {
492 gint32 l;
493 memcpy(&l, buf + offset, 4);
494 return (long)GINT32_FROM_LE(l);
495 }
mdb_pg_get_int32(MdbHandle * mdb,int offset)496 long mdb_pg_get_int32(MdbHandle *mdb, int offset)
497 {
498 if (offset <0 || (offset+4) > (int)mdb->fmt->pg_size) return -1;
499 mdb->cur_pos+=4;
500 return mdb_get_int32((char *)mdb->pg_buf, offset);
501 }
502
mdb_get_single(char * buf,int offset)503 float mdb_get_single(char *buf, int offset)
504 {
505 union {guint32 g; float f;} f;
506 memcpy(&f, buf + offset, 4);
507 f.g = GUINT32_FROM_LE(f.g);
508 return f.f;
509 }
mdb_pg_get_single(MdbHandle * mdb,int offset)510 float mdb_pg_get_single(MdbHandle *mdb, int offset)
511 {
512 if (offset <0 || (offset+4) > (int)mdb->fmt->pg_size) return -1;
513 mdb->cur_pos+=4;
514 return mdb_get_single((char *)mdb->pg_buf, offset);
515 }
516
mdb_get_double(char * buf,int offset)517 double mdb_get_double(char *buf, int offset)
518 {
519 union {guint64 g; double d;} d;
520 memcpy(&d, buf + offset, 8);
521 d.g = GUINT64_FROM_LE(d.g);
522 return d.d;
523 }
mdb_pg_get_double(MdbHandle * mdb,int offset)524 double mdb_pg_get_double(MdbHandle *mdb, int offset)
525 {
526 if (offset <0 || (offset+8) > (int)mdb->fmt->pg_size) return -1;
527 mdb->cur_pos+=8;
528 return mdb_get_double((char *)mdb->pg_buf, offset);
529 }
530
531
532 int
mdb_set_pos(MdbHandle * mdb,int pos)533 mdb_set_pos(MdbHandle *mdb, int pos)
534 {
535 if (pos<0 || pos >= (int)mdb->fmt->pg_size) return 0;
536
537 mdb->cur_pos=pos;
538 return pos;
539 }
mdb_get_pos(MdbHandle * mdb)540 int mdb_get_pos(MdbHandle *mdb)
541 {
542 return mdb->cur_pos;
543 }
544