1 /* MDB Tools - A library for reading MS Access database files
2 * Copyright (C) 2000 Brian Bruns
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include "mdbtools.h"
21 #include "errno.h"
22
23 #ifdef DMALLOC
24 #include "dmalloc.h"
25 #endif
26
27 /*
28 * This function is used in reading text data from an MDB table.
29 */
30 int
mdb_unicode2ascii(MdbHandle * mdb,unsigned char * src,unsigned int slen,unsigned char * dest,unsigned int dlen)31 mdb_unicode2ascii(MdbHandle *mdb, unsigned char *src, unsigned int slen, unsigned char *dest, unsigned int dlen)
32 {
33 unsigned char *tmp = NULL;
34 size_t tlen = 0;
35 size_t len_in, len_out;
36 char *in_ptr, *out_ptr;
37
38 if ((!src) || (!dest))
39 return 0;
40
41 /* Uncompress 'Unicode Compressed' string into tmp */
42 if (IS_JET4(mdb) && (slen>=2) && (src[0]==0xff) && (src[1]==0xfe)) {
43 unsigned int compress=1;
44 src += 2;
45 slen -= 2;
46 tmp = (unsigned char *)g_malloc(slen*2);
47 while (slen) {
48 if (*src == 0) {
49 compress = (compress) ? 0 : 1;
50 src++;
51 slen--;
52 } else if (compress) {
53 tmp[tlen++] = *src++;
54 tmp[tlen++] = 0;
55 slen--;
56 } else if (slen >= 2){
57 tmp[tlen++] = *src++;
58 tmp[tlen++] = *src++;
59 slen-=2;
60 }
61 }
62 }
63
64 in_ptr = (char *)((tmp) ? tmp : src);
65 out_ptr = (char *)dest;
66 len_in = (tmp) ? tlen : slen;
67 len_out = dlen;
68
69 #if HAVE_ICONV
70 //printf("1 len_in %d len_out %d\n",len_in, len_out);
71 while (1) {
72 iconv(mdb->iconv_in, &in_ptr, &len_in, &out_ptr, &len_out);
73 if ((!len_in) || (errno == E2BIG)) break;
74 /* Don't bail if impossible conversion is encountered */
75 in_ptr += (IS_JET4(mdb)) ? 2 : 1;
76 len_in -= (IS_JET4(mdb)) ? 2 : 1;
77 *out_ptr++ = '?';
78 len_out--;
79 }
80 //printf("2 len_in %d len_out %d\n",len_in, len_out);
81 dlen -= len_out;
82 #else
83 if (IS_JET3(mdb)) {
84 strncpy(out_ptr, in_ptr, len_in);
85 dlen = len_in;
86 } else {
87 /* rough UCS-2LE to ISO-8859-1 conversion */
88 unsigned int i;
89 for (i=0; i<len_in; i+=2)
90 dest[i/2] = (in_ptr[i+1] == 0) ? in_ptr[i] : '?';
91 dlen = len_in/2;
92 }
93 #endif
94
95 if (tmp) g_free(tmp);
96 dest[dlen]='\0';
97 //printf("dest %s\n",dest);
98 return dlen;
99 }
100
101 /*
102 * This function is used in writing text data to an MDB table.
103 * If slen is 0, strlen will be used to calculate src's length.
104 */
105 int
mdb_ascii2unicode(MdbHandle * mdb,unsigned char * src,unsigned int slen,unsigned char * dest,unsigned int dlen)106 mdb_ascii2unicode(MdbHandle *mdb, unsigned char *src, unsigned int slen, unsigned char *dest, unsigned int dlen)
107 {
108 size_t len_in, len_out;
109 char *in_ptr, *out_ptr;
110
111 if ((!src) || (!dest))
112 return 0;
113
114 in_ptr = (char *)src;
115 out_ptr = (char *)dest;
116 len_in = (slen) ? slen : strlen(in_ptr);
117 len_out = dlen;
118
119 #ifdef HAVE_ICONV
120 iconv(mdb->iconv_out, &in_ptr, &len_in, &out_ptr, &len_out);
121 //printf("len_in %d len_out %d\n", len_in, len_out);
122 dlen -= len_out;
123 #else
124 if (IS_JET3(mdb)) {
125 dlen = MIN(len_in, len_out);
126 strncpy(out_ptr, in_ptr, dlen);
127 } else {
128 unsigned int i;
129 slen = MIN(len_in, len_out/2);
130 dlen = slen*2;
131 for (i=0; i<slen; i++) {
132 out_ptr[i*2] = in_ptr[i];
133 out_ptr[i*2+1] = 0;
134 }
135 }
136 #endif
137
138 /* Unicode Compression */
139 if(IS_JET4(mdb) && (dlen>4)) {
140 unsigned char *tmp = g_malloc(dlen);
141 unsigned int tptr = 0, dptr = 0;
142 int comp = 1;
143
144 tmp[tptr++] = 0xff;
145 tmp[tptr++] = 0xfe;
146 while((dptr < dlen) && (tptr < dlen)) {
147 if (((dest[dptr+1]==0) && (comp==0))
148 || ((dest[dptr+1]!=0) && (comp==1))) {
149 /* switch encoding mode */
150 tmp[tptr++] = 0;
151 comp = (comp) ? 0 : 1;
152 } else if (dest[dptr]==0) {
153 /* this string cannot be compressed */
154 tptr = dlen;
155 } else if (comp==1) {
156 /* encode compressed character */
157 tmp[tptr++] = dest[dptr];
158 dptr += 2;
159 } else if (tptr+1 < dlen) {
160 /* encode uncompressed character */
161 tmp[tptr++] = dest[dptr];
162 tmp[tptr++] = dest[dptr+1];
163 dptr += 2;
164 } else {
165 /* could not encode uncompressed character
166 * into single byte */
167 tptr = dlen;
168 }
169 }
170 if (tptr < dlen) {
171 memcpy(dest, tmp, tptr);
172 dlen = tptr;
173 }
174 g_free(tmp);
175 }
176
177 return dlen;
178 }
179
mdb_iconv_init(MdbHandle * mdb)180 void mdb_iconv_init(MdbHandle *mdb)
181 {
182 char *iconv_code;
183
184 /* check environment variable */
185 if (!(iconv_code=(char *)getenv("MDB_ICONV"))) {
186 iconv_code="UTF-8";
187 }
188
189 #ifdef HAVE_ICONV
190 if (IS_JET4(mdb)) {
191 mdb->iconv_out = iconv_open("UCS-2LE", iconv_code);
192 mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE");
193 } else {
194 /* According to Microsoft Knowledge Base pages 289525 and */
195 /* 202427, code page info is not contained in the database */
196 char *jet3_iconv_code;
197
198 /* check environment variable */
199 if (!(jet3_iconv_code=(char *)getenv("MDB_JET3_CHARSET"))) {
200 jet3_iconv_code="CP1252";
201 }
202
203 mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);
204 mdb->iconv_in = iconv_open(iconv_code, jet3_iconv_code);
205 }
206 #endif
207 }
mdb_iconv_close(MdbHandle * mdb)208 void mdb_iconv_close(MdbHandle *mdb)
209 {
210 #ifdef HAVE_ICONV
211 if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out);
212 if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in);
213 #endif
214 }
215