1 /* MDB Tools - A library for reading MS Access database files
2  * Copyright (C) 2000 Brian Bruns
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA  02110-1301, USA.
18  */
19 
20 #include "mdbtools.h"
21 #include "errno.h"
22 
23 #ifdef DMALLOC
24 #include "dmalloc.h"
25 #endif
26 
27 /*
28  * This function is used in reading text data from an MDB table.
29  */
30 int
mdb_unicode2ascii(MdbHandle * mdb,unsigned char * src,unsigned int slen,unsigned char * dest,unsigned int dlen)31 mdb_unicode2ascii(MdbHandle *mdb, unsigned char *src, unsigned int slen, unsigned char *dest, unsigned int dlen)
32 {
33 	unsigned char *tmp = NULL;
34 	size_t tlen = 0;
35 	size_t len_in, len_out;
36 	char *in_ptr, *out_ptr;
37 
38 	if ((!src) || (!dest))
39 		return 0;
40 
41 	/* Uncompress 'Unicode Compressed' string into tmp */
42 	if (IS_JET4(mdb) && (slen>=2) && (src[0]==0xff) && (src[1]==0xfe)) {
43 		unsigned int compress=1;
44 		src += 2;
45 		slen -= 2;
46 		tmp = (unsigned char *)g_malloc(slen*2);
47 		while (slen) {
48 			if (*src == 0) {
49 				compress = (compress) ? 0 : 1;
50 				src++;
51 				slen--;
52 			} else if (compress) {
53 				tmp[tlen++] = *src++;
54 				tmp[tlen++] = 0;
55 				slen--;
56 			} else if (slen >= 2){
57 				tmp[tlen++] = *src++;
58 				tmp[tlen++] = *src++;
59 				slen-=2;
60 			}
61 		}
62 	}
63 
64 	in_ptr = (char *)((tmp) ? tmp : src);
65 	out_ptr = (char *)dest;
66 	len_in = (tmp) ? tlen : slen;
67 	len_out = dlen;
68 
69 #if HAVE_ICONV
70 	//printf("1 len_in %d len_out %d\n",len_in, len_out);
71 	while (1) {
72 		iconv(mdb->iconv_in, &in_ptr, &len_in, &out_ptr, &len_out);
73 		if ((!len_in) || (errno == E2BIG)) break;
74 		/* Don't bail if impossible conversion is encountered */
75 		in_ptr += (IS_JET4(mdb)) ? 2 : 1;
76 		len_in -= (IS_JET4(mdb)) ? 2 : 1;
77 		*out_ptr++ = '?';
78 		len_out--;
79 	}
80 	//printf("2 len_in %d len_out %d\n",len_in, len_out);
81 	dlen -= len_out;
82 #else
83 	if (IS_JET3(mdb)) {
84 		strncpy(out_ptr, in_ptr, len_in);
85 		dlen = len_in;
86 	} else {
87 		/* rough UCS-2LE to ISO-8859-1 conversion */
88 		unsigned int i;
89 		for (i=0; i<len_in; i+=2)
90 			dest[i/2] = (in_ptr[i+1] == 0) ? in_ptr[i] : '?';
91 		dlen = len_in/2;
92 	}
93 #endif
94 
95 	if (tmp) g_free(tmp);
96 	dest[dlen]='\0';
97 	//printf("dest %s\n",dest);
98 	return dlen;
99 }
100 
101 /*
102  * This function is used in writing text data to an MDB table.
103  * If slen is 0, strlen will be used to calculate src's length.
104  */
105 int
mdb_ascii2unicode(MdbHandle * mdb,unsigned char * src,unsigned int slen,unsigned char * dest,unsigned int dlen)106 mdb_ascii2unicode(MdbHandle *mdb, unsigned char *src, unsigned int slen, unsigned char *dest, unsigned int dlen)
107 {
108         size_t len_in, len_out;
109         char *in_ptr, *out_ptr;
110 
111 	if ((!src) || (!dest))
112 		return 0;
113 
114         in_ptr = (char *)src;
115         out_ptr = (char *)dest;
116         len_in = (slen) ? slen : strlen(in_ptr);
117         len_out = dlen;
118 
119 #ifdef HAVE_ICONV
120 	iconv(mdb->iconv_out, &in_ptr, &len_in, &out_ptr, &len_out);
121 	//printf("len_in %d len_out %d\n", len_in, len_out);
122 	dlen -= len_out;
123 #else
124 	if (IS_JET3(mdb)) {
125 		dlen = MIN(len_in, len_out);
126 		strncpy(out_ptr, in_ptr, dlen);
127 	} else {
128 		unsigned int i;
129 		slen = MIN(len_in, len_out/2);
130 		dlen = slen*2;
131 		for (i=0; i<slen; i++) {
132 			out_ptr[i*2] = in_ptr[i];
133 			out_ptr[i*2+1] = 0;
134 		}
135 	}
136 #endif
137 
138 	/* Unicode Compression */
139 	if(IS_JET4(mdb) && (dlen>4)) {
140 		unsigned char *tmp = g_malloc(dlen);
141 		unsigned int tptr = 0, dptr = 0;
142 		int comp = 1;
143 
144 		tmp[tptr++] = 0xff;
145 		tmp[tptr++] = 0xfe;
146 		while((dptr < dlen) && (tptr < dlen)) {
147 			if (((dest[dptr+1]==0) && (comp==0))
148 			 || ((dest[dptr+1]!=0) && (comp==1))) {
149 				/* switch encoding mode */
150 				tmp[tptr++] = 0;
151 				comp = (comp) ? 0 : 1;
152 			} else if (dest[dptr]==0) {
153 				/* this string cannot be compressed */
154 				tptr = dlen;
155 			} else if (comp==1) {
156 				/* encode compressed character */
157 				tmp[tptr++] = dest[dptr];
158 				dptr += 2;
159 			} else if (tptr+1 < dlen) {
160 				/* encode uncompressed character */
161 				tmp[tptr++] = dest[dptr];
162 				tmp[tptr++] = dest[dptr+1];
163 				dptr += 2;
164 			} else {
165 				/* could not encode uncompressed character
166 				 * into single byte */
167 				tptr = dlen;
168 			}
169 		}
170 		if (tptr < dlen) {
171 			memcpy(dest, tmp, tptr);
172 			dlen = tptr;
173 		}
174 		g_free(tmp);
175 	}
176 
177 	return dlen;
178 }
179 
mdb_iconv_init(MdbHandle * mdb)180 void mdb_iconv_init(MdbHandle *mdb)
181 {
182 	char *iconv_code;
183 
184 	/* check environment variable */
185 	if (!(iconv_code=(char *)getenv("MDB_ICONV"))) {
186 		iconv_code="UTF-8";
187 	}
188 
189 #ifdef HAVE_ICONV
190         if (IS_JET4(mdb)) {
191                 mdb->iconv_out = iconv_open("UCS-2LE", iconv_code);
192                 mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE");
193         } else {
194                 /* According to Microsoft Knowledge Base pages 289525 and */
195 		/* 202427, code page info is not contained in the database */
196 		char *jet3_iconv_code;
197 
198 		/* check environment variable */
199 		if (!(jet3_iconv_code=(char *)getenv("MDB_JET3_CHARSET"))) {
200 			jet3_iconv_code="CP1252";
201 		}
202 
203                 mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);
204                 mdb->iconv_in = iconv_open(iconv_code, jet3_iconv_code);
205         }
206 #endif
207 }
mdb_iconv_close(MdbHandle * mdb)208 void mdb_iconv_close(MdbHandle *mdb)
209 {
210 #ifdef HAVE_ICONV
211         if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out);
212         if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in);
213 #endif
214 }
215