1 #include <gnumeric-config.h>
2 #include <glib/gi18n-lib.h>
3 #include <gnumeric.h>
4 #include <libgnumeric.h>
5 #include "xbase.h"
6
7 #include <gnm-format.h>
8 #include <gutils.h>
9 #include <sheet.h>
10 #include <goffice/goffice.h>
11
12 #include <string.h>
13 #include <gsf/gsf-input.h>
14 #include <gsf/gsf-utils.h>
15 #include <gsf/gsf-msole-utils.h>
16
17 #define XBASE_DEBUG 0
18 #if XBASE_DEBUG > 0
19 #define d(level, code) do { if (XBASE_DEBUG > level) { code } } while (0)
20 #else
21 #define d(level, code)
22 #endif
23
24 static char const * const field_types = "CNLDMF?BGPYTI";
25
26 #if XBASE_DEBUG > 0
27 static char const * const field_type_descriptions [] = {
28 "Character", "Number", "Logical", "Date", "Memo", "Floating point",
29 "Character name variable", "Binary", "General", "Picture", "Currency",
30 "DateTime", "Integer"
31 };
32 #endif
33
34
35 /**
36 * Newly allocated pointer to record, initialised as first in database.
37 */
38 XBrecord *
record_new(XBfile * file)39 record_new (XBfile *file)
40 {
41 XBrecord *ans = g_new (XBrecord, 1);
42 ans->file = file;
43 ans->row = 1;
44 /* ans->data = g_new (guint8, file->fieldlen); */
45 ans->data = (guint8 *) g_strnfill (file->fieldlen, '?'); /* FIXME : just for testing */
46 record_seek (ans, SEEK_SET, 1);
47 return ans;
48 }
49
50 /**
51 * Position record at requested row, and load raw data. Returns FALSE on
52 * invalid row, file error, or invalid whence (same values as in fseek).
53 */
54 gboolean
record_seek(XBrecord * record,int whence,gsf_off_t row)55 record_seek (XBrecord *record, int whence, gsf_off_t row)
56 {
57 gsf_off_t offset;
58 switch (whence) {
59 case SEEK_SET:
60 offset = row;
61 break;
62 case SEEK_CUR:
63 offset = record->row + row;
64 break;
65 case SEEK_END:
66 offset = record->file->records + 1 - row;
67 break;
68 default:
69 g_warning("record_seek: invalid whence (%d)", whence);
70 return FALSE;
71 }
72 if (offset < 1 || offset > (gsf_off_t)record->file->records)
73 return FALSE;
74 record->row = offset;
75 offset = (offset-1) * record->file->fieldlen + record->file->headerlen;
76 return !gsf_input_seek (record->file->input, offset, G_SEEK_SET) &&
77 gsf_input_read (record->file->input, record->file->fieldlen, record->data) != NULL;
78 }
79
80 /**
81 * Clear allocated space for record.
82 */
83 void
record_free(XBrecord * record)84 record_free (XBrecord *record)
85 {
86 g_free (record->data);
87 g_free (record);
88 }
89
90 /**
91 * Points to binary data for num'th field in record's data.
92 */
93 gchar *
record_get_field(XBrecord const * record,guint num)94 record_get_field (XBrecord const *record, guint num)
95 {
96 if (num >= record->file->fields)
97 return NULL;
98 return (gchar *)record->data + record->file->format[num]->pos + 1;
99 }
100
101 gboolean
record_deleted(XBrecord * record)102 record_deleted (XBrecord *record)
103 {
104 return record->data[0] == 0x2a;
105 }
106
107 static void
xbase_read_header(XBfile * x,GOErrorInfo ** ret_error)108 xbase_read_header (XBfile *x, GOErrorInfo **ret_error)
109 {
110 static struct {
111 guint8 const id;
112 int const codepage;
113 char const *const name;
114 } const codepages [] = {
115 { 0x01, 437, "U.S. MS-DOS" },
116 { 0x02, 850, "International MS-DOS" },
117 { 0x03, 1252, "Windows ANSI" },
118 { 0x04, 10000, "Standard Macintosh" },
119 { 0x08, 865, "Danish OEM" },
120 { 0x09, 437, "Dutch OEM" },
121 { 0x0A, 850, "Dutch OEM*" },
122 { 0x0B, 437, "Finnish OEM" },
123 { 0x0D, 437, "French OEM" },
124 { 0x0E, 850, "French OEM*" },
125 { 0x0F, 437, "German OEM" },
126 { 0x10, 850, "German OEM*" },
127 { 0x11, 437, "Italian OEM" },
128 { 0x12, 850, "Italian OEM*" },
129 { 0x13, 932, "Japanese Shift-JIS" },
130 { 0x14, 850, "Spanish OEM*" },
131 { 0x15, 437, "Swedish OEM" },
132 { 0x16, 850, "Swedish OEM*" },
133 { 0x17, 865, "Norwegian OEM" },
134 { 0x18, 437, "Spanish OEM" },
135 { 0x19, 437, "English OEM (Britain)" },
136 { 0x1A, 850, "English OEM (Britain)*" },
137 { 0x1B, 437, "English OEM (U.S.)" },
138 { 0x1C, 863, "French OEM (Canada)" },
139 { 0x1D, 850, "French OEM*" },
140 { 0x1F, 852, "Czech OEM" },
141 { 0x22, 852, "Hungarian OEM" },
142 { 0x23, 852, "Polish OEM" },
143 { 0x24, 860, "Portugese OEM" },
144 { 0x25, 850, "Potugese OEM*" },
145 { 0x26, 866, "Russian OEM" },
146 { 0x37, 850, "English OEM (U.S.)*" },
147 { 0x40, 852, "Romanian OEM" },
148 { 0x4D, 936, "Chinese GBK (PRC)" },
149 { 0x4E, 949, "Korean (ANSI/OEM)" },
150 { 0x4F, 950, "Chinese Big 5 (Taiwan)" },
151 { 0x50, 874, "Thai (ANSI/OEM)" },
152 { 0x57, 1252, "Windows ANSI" }, /* guess */
153 { 0x58, 1252, "Western European ANSI" },
154 { 0x59, 1252, "Spanish ANSI" },
155 { 0x64, 852, "Eastern European MS-DOS" },
156 { 0x65, 866, "Russian MS-DOS" },
157 { 0x66, 865, "Nordic MS-DOS" },
158 { 0x67, 861, "Icelandic MS-DOS" },
159 { 0x68, 895, "Kamenicky (Czech) MS-DOS" },
160 { 0x69, 620, "Mazovia (Polish) MS-DOS" },
161 { 0x6A, 737, "Greek MS-DOS (437G)" },
162 { 0x6B, 857, "Turkish MS-DOS" },
163 { 0x6C, 863, "French-Canadian MS-DOS" },
164 { 0x78, 950, "Chinese (Hong Kong SAR, Taiwan) Windows" },
165 { 0x79, 949, "Korean Windows" },
166 { 0x7A, 936, "Chinese (PRC, Singapore) Windows" },
167 { 0x7B, 932, "Japanese Windows" },
168 { 0x7C, 874, "Thai Windows" },
169 { 0x7D, 1255, "Hebrew Windows" },
170 { 0x7E, 1256, "Arabic Windows" },
171 { 0x86, 737, "Greek OEM" },
172 { 0x87, 852, "Slovenian OEM" },
173 { 0x88, 857, "Turkish OEM" },
174 { 0x96, 10007, "Russian Macintosh" },
175 { 0x97, 10029, "Macintosh EE" },
176 { 0x98, 10006, "Greek Macintosh" },
177 { 0xC8, 1250, "Eastern European Windows" },
178 { 0xC9, 1251, "Russian Windows" },
179 { 0xCA, 1254, "Turkish Windows" },
180 { 0xCB, 1253, "Greek Windows" },
181 { 0xCC, 1257, "Baltic Windows" },
182 { 0x00, 0, NULL }
183 };
184 int i;
185 guint8 hdr[32];
186
187 if (gsf_input_read (x->input, 32, hdr) == NULL) {
188 *ret_error = go_error_info_new_str (_("Failed to read DBF header."));
189 return;
190 }
191
192 switch (hdr[0]) { /* FIXME: assuming dBASE III+, not IV */
193 case 0x02:
194 #if XBASE_DEBUG > 0
195 g_printerr ("FoxBASE\n");
196 #endif
197 break;
198 case 0x03:
199 #if XBASE_DEBUG > 0
200 g_printerr ("FoxBASE+/dBASE III PLUS, no memo\n");
201 #endif
202 break;
203 case 0x30:
204 #if XBASE_DEBUG > 0
205 g_printerr ("Visual FoxPro\n");
206 #endif
207 break;
208 case 0x43:
209 #if XBASE_DEBUG > 0
210 g_printerr ("dBASE IV SQL table files, no memo\n");
211 #endif
212 break;
213 case 0x63:
214 #if XBASE_DEBUG > 0
215 g_printerr ("dBASE IV SQL system files, no memo\n");
216 #endif
217 break;
218 case 0x83:
219 #if XBASE_DEBUG > 0
220 g_printerr ("FoxBASE+/dBASE III PLUS, with memo\n");
221 #endif
222 break;
223 case 0x8B:
224 #if XBASE_DEBUG > 0
225 g_printerr ("dBASE IV with memo\n");
226 #endif
227 break;
228 case 0xCB:
229 #if XBASE_DEBUG > 0
230 g_printerr ("dBASE IV SQL table files, with memo\n");
231 #endif
232 break;
233 case 0xF5:
234 #if XBASE_DEBUG > 0
235 g_printerr ("FoxPro 2.x (or earlier) with memo\n");
236 #endif
237 break;
238 case 0xFB:
239 #if XBASE_DEBUG > 0
240 g_printerr ("FoxBASE\n");
241 #endif
242 break;
243 default:
244 g_printerr ("unknown 0x%hhx\n", hdr[0]);
245 }
246
247 x->records = GSF_LE_GET_GUINT32 (hdr + 4);
248 x->headerlen = GSF_LE_GET_GUINT16 (hdr + 8);
249 x->fieldlen = GSF_LE_GET_GUINT16 (hdr + 10);
250 #if XBASE_DEBUG > 0
251 g_printerr ("Last update (YY/MM/DD):\t%2hhd/%2hhd/%2hhd\n",hdr[1],hdr[2],hdr[3]); /* Y2K ?!? */
252 g_printerr ("Records:\t%u\n", x->records);
253 g_printerr ("Header length:\t%u\n", x->headerlen);
254 g_printerr ("Record length:\t%u\n", x->fieldlen);
255 g_printerr ("Reserved:\t%d\n", GSF_LE_GET_GUINT16 (hdr + 12));
256 g_printerr ("Incomplete transaction:\t%hhd\n", hdr[14]);
257 g_printerr ("Encryption flag:\t%d\n", hdr[15]);
258 g_printerr ("Free record thread:\t%u\n", GSF_LE_GET_GUINT32 (hdr + 16));
259 g_printerr ("Reserved (multi-user):\t%" G_GINT64_FORMAT "\n",
260 GSF_LE_GET_GUINT64(hdr + 20));
261 g_printerr ("MDX flag:\t%d\n", hdr[28]); /* FIXME: decode */
262 g_printerr ("Reserved:\t%d\n", GSF_LE_GET_GUINT16 (hdr + 30));
263 g_printerr ("Language driver (code page):\t");
264 #endif
265 x->char_map = (GIConv)-1;
266 for (i = 0; codepages[i].id != 0 ; i++)
267 if (codepages[i].id == hdr[29]) {
268 x->char_map = gsf_msole_iconv_open_for_import (codepages[i].codepage);
269 d (1, g_printerr ("%s (%d)\n",
270 codepages[i].name, codepages[i].codepage););
271 break;
272 }
273 if (x->char_map == (GIConv)-1) {
274 #if XBASE_DEBUG > 0
275 g_printerr ("unknown 0x%x\n!\n", hdr[29]);
276 #endif
277 g_warning ("File has unknown or missing code page information (%x)",
278 hdr[29]);
279 /* Got any better idea? */
280 x->char_map = g_iconv_open ("UTF-8", "ISO-8859-1");
281 }
282 }
283
284 static XBfield *
xbase_field_new(XBfile * file)285 xbase_field_new (XBfile *file)
286 {
287 XBfield *field;
288 guint8 buf[33];
289 char *p;
290 if (gsf_input_read (file->input, 2, buf) == NULL) { /* 1 byte out ? */
291 g_warning ("xbase_field_new: fread error");
292 return NULL;
293 } else if (buf[0] == 0x0D || buf[0] == 0) { /* field array terminator */
294 file->offset = gsf_input_tell (file->input);
295 if (buf[0] == 0x00 && buf[1] == 0x0D) { /* FIXME: crude test, not in spec */
296 if (gsf_input_seek (file->input, 263, G_SEEK_CUR)) /* skip DBC */
297 g_warning ("xbase_field_new: fseek error");
298 }
299 return NULL;
300 } else if (gsf_input_read (file->input, 30, buf+2) == NULL) {
301 g_warning ("Field descriptor short");
302 return NULL;
303 }
304 #if XBASE_DEBUG > 0
305 buf[32] = 0;
306 g_printerr ("Field:\t'%s'\n", buf);
307 #endif
308
309 field = g_new (XBfield, 1);
310 field->len = buf[16];
311
312 strncpy(field->name, buf, 10);
313 field->name[10] = '\0';
314 if ((p = strchr (field_types, field->type = buf[11])) == NULL)
315 g_warning ("Unrecognised field type '%c'", field->type);
316 #if XBASE_DEBUG > 0
317 else
318 g_printerr ("Type:\t%c (%s)\n", field->type,
319 field_type_descriptions [p-field_types]);
320 g_printerr ("Data address:\t0x%.8X\n", GSF_LE_GET_GUINT32 (buf + 12));
321 g_printerr ("Length:\t%d\n", field->len);
322 g_printerr ("Decimal count:\t%d\n", buf[17]);
323 #endif
324 if (file->fields) {
325 XBfield *tmp = file->format[file->fields-1];
326 field->pos = tmp->pos + tmp->len;
327 } else
328 field->pos = 0;
329
330 field->fmt = (field->type == 'D')
331 ? go_format_ref (go_format_default_date ())
332 : NULL;
333
334 return field; /* FIXME: use more of buf if needed ? */
335 }
336
337 XBfile *
xbase_open(GsfInput * input,GOErrorInfo ** ret_error)338 xbase_open (GsfInput *input, GOErrorInfo **ret_error)
339 {
340 XBfile *ans;
341 guint allocated = GNM_DEFAULT_COLS;
342
343 *ret_error = NULL;
344
345 ans = g_new (XBfile, 1);
346 ans->input = input;
347
348 xbase_read_header (ans, ret_error);
349 if (*ret_error) {
350 g_free (ans);
351 return NULL;
352 }
353
354 ans->fields = 0;
355 ans->format = g_new (XBfield *, allocated);
356 while (ans->fields < GNM_MAX_COLS) {
357 XBfield *field = xbase_field_new (ans);
358 if (!field)
359 break;
360 if (ans->fields == allocated) {
361 allocated *= 2;
362 ans->format = g_renew (XBfield *, ans->format, allocated);
363 }
364 ans->format[ans->fields++] = field;
365 }
366
367 return ans;
368 }
369
370 void
xbase_close(XBfile * x)371 xbase_close (XBfile *x)
372 {
373 unsigned i;
374
375 for (i = 0; i < x->fields; i++) {
376 XBfield *field = x->format[i];
377 go_format_unref (field->fmt);
378 g_free (field);
379 }
380 gsf_iconv_close (x->char_map);
381 g_free (x->format);
382 g_free (x);
383 }
384