1 #include <gnumeric-config.h>
2 #include <glib/gi18n-lib.h>
3 #include <gnumeric.h>
4 #include <libgnumeric.h>
5 #include "xbase.h"
6 
7 #include <gnm-format.h>
8 #include <gutils.h>
9 #include <sheet.h>
10 #include <goffice/goffice.h>
11 
12 #include <string.h>
13 #include <gsf/gsf-input.h>
14 #include <gsf/gsf-utils.h>
15 #include <gsf/gsf-msole-utils.h>
16 
17 #define XBASE_DEBUG 0
18 #if XBASE_DEBUG > 0
19 #define d(level, code)	do { if (XBASE_DEBUG > level) { code } } while (0)
20 #else
21 #define d(level, code)
22 #endif
23 
24 static char const * const field_types = "CNLDMF?BGPYTI";
25 
26 #if XBASE_DEBUG > 0
27 static char const * const field_type_descriptions [] = {
28 	"Character", "Number", "Logical", "Date", "Memo", "Floating point",
29 	"Character name variable", "Binary", "General", "Picture", "Currency",
30 	"DateTime", "Integer"
31 };
32 #endif
33 
34 
35 /**
36  * Newly allocated pointer to record, initialised as first in database.
37 */
38 XBrecord *
record_new(XBfile * file)39 record_new (XBfile *file)
40 {
41 	XBrecord *ans = g_new (XBrecord, 1);
42 	ans->file = file;
43 	ans->row = 1;
44 	/* ans->data = g_new (guint8, file->fieldlen); */
45 	ans->data = (guint8 *) g_strnfill (file->fieldlen, '?'); /* FIXME : just for testing */
46 	record_seek (ans, SEEK_SET, 1);
47 	return ans;
48 }
49 
50 /**
51  * Position record at requested row, and load raw data.  Returns FALSE on
52  * invalid row, file error, or invalid whence (same values as in fseek).
53  */
54 gboolean
record_seek(XBrecord * record,int whence,gsf_off_t row)55 record_seek (XBrecord *record, int whence, gsf_off_t row)
56 {
57 	gsf_off_t offset;
58 	switch (whence) {
59 	case SEEK_SET:
60 		offset = row;
61 		break;
62 	case SEEK_CUR:
63 		offset = record->row + row;
64 		break;
65 	case SEEK_END:
66 		offset = record->file->records + 1 - row;
67 		break;
68 	default:
69 		g_warning("record_seek: invalid whence (%d)", whence);
70 		return FALSE;
71 	}
72 	if (offset < 1 || offset > (gsf_off_t)record->file->records)
73 		return FALSE;
74 	record->row = offset;
75 	offset = (offset-1) * record->file->fieldlen + record->file->headerlen;
76 	return !gsf_input_seek (record->file->input, offset, G_SEEK_SET) &&
77 	    gsf_input_read (record->file->input, record->file->fieldlen, record->data) != NULL;
78 }
79 
80 /**
81  * Clear allocated space for record.
82  */
83 void
record_free(XBrecord * record)84 record_free (XBrecord *record)
85 {
86 	g_free (record->data);
87 	g_free (record);
88 }
89 
90 /**
91  * Points to binary data for num'th field in record's data.
92  */
93 gchar *
record_get_field(XBrecord const * record,guint num)94 record_get_field (XBrecord const *record, guint num)
95 {
96 	if (num >= record->file->fields)
97 		return NULL;
98 	return (gchar *)record->data + record->file->format[num]->pos + 1;
99 }
100 
101 gboolean
record_deleted(XBrecord * record)102 record_deleted (XBrecord *record)
103 {
104 	return record->data[0] == 0x2a;
105 }
106 
107 static void
xbase_read_header(XBfile * x,GOErrorInfo ** ret_error)108 xbase_read_header (XBfile *x, GOErrorInfo **ret_error)
109 {
110 	static struct {
111 		guint8 const id;
112 		int    const codepage;
113 		char const *const name;
114 	} const codepages [] = {
115 		{ 0x01, 437, "U.S. MS-DOS" },
116 		{ 0x02, 850, "International MS-DOS" },
117 		{ 0x03, 1252, "Windows ANSI" },
118 		{ 0x04, 10000, "Standard Macintosh" },
119 		{ 0x08, 865, "Danish OEM" },
120 		{ 0x09, 437, "Dutch OEM" },
121 		{ 0x0A, 850, "Dutch OEM*" },
122 		{ 0x0B, 437, "Finnish OEM" },
123 		{ 0x0D, 437, "French OEM" },
124 		{ 0x0E, 850, "French OEM*" },
125 		{ 0x0F, 437, "German OEM" },
126 		{ 0x10, 850, "German OEM*" },
127 		{ 0x11, 437, "Italian OEM" },
128 		{ 0x12, 850, "Italian OEM*" },
129 		{ 0x13, 932, "Japanese Shift-JIS" },
130 		{ 0x14, 850, "Spanish OEM*" },
131 		{ 0x15, 437, "Swedish OEM" },
132 		{ 0x16, 850, "Swedish OEM*" },
133 		{ 0x17, 865, "Norwegian OEM" },
134 		{ 0x18, 437, "Spanish OEM" },
135 		{ 0x19, 437, "English OEM (Britain)" },
136 		{ 0x1A, 850, "English OEM (Britain)*" },
137 		{ 0x1B, 437, "English OEM (U.S.)" },
138 		{ 0x1C, 863, "French OEM (Canada)" },
139 		{ 0x1D, 850, "French OEM*" },
140 		{ 0x1F, 852, "Czech OEM" },
141 		{ 0x22, 852, "Hungarian OEM" },
142 		{ 0x23, 852, "Polish OEM" },
143 		{ 0x24, 860, "Portugese OEM" },
144 		{ 0x25, 850, "Potugese OEM*" },
145 		{ 0x26, 866, "Russian OEM" },
146 		{ 0x37, 850, "English OEM (U.S.)*" },
147 		{ 0x40, 852, "Romanian OEM" },
148 		{ 0x4D, 936, "Chinese GBK (PRC)" },
149 		{ 0x4E, 949, "Korean (ANSI/OEM)" },
150 		{ 0x4F, 950, "Chinese Big 5 (Taiwan)" },
151 		{ 0x50, 874, "Thai (ANSI/OEM)" },
152 		{ 0x57, 1252, "Windows ANSI" }, /* guess */
153 		{ 0x58, 1252, "Western European ANSI" },
154 		{ 0x59, 1252, "Spanish ANSI" },
155 		{ 0x64, 852, "Eastern European MS-DOS" },
156 		{ 0x65, 866, "Russian MS-DOS" },
157 		{ 0x66, 865, "Nordic MS-DOS" },
158 		{ 0x67, 861, "Icelandic MS-DOS" },
159 		{ 0x68, 895, "Kamenicky (Czech) MS-DOS" },
160 		{ 0x69, 620, "Mazovia (Polish) MS-DOS" },
161 		{ 0x6A, 737, "Greek MS-DOS (437G)" },
162 		{ 0x6B, 857, "Turkish MS-DOS" },
163 		{ 0x6C, 863, "French-Canadian MS-DOS" },
164 		{ 0x78, 950, "Chinese (Hong Kong SAR, Taiwan) Windows" },
165 		{ 0x79, 949, "Korean Windows" },
166 		{ 0x7A, 936, "Chinese (PRC, Singapore) Windows" },
167 		{ 0x7B, 932, "Japanese Windows" },
168 		{ 0x7C, 874, "Thai Windows" },
169 		{ 0x7D, 1255, "Hebrew Windows" },
170 		{ 0x7E, 1256, "Arabic Windows" },
171 		{ 0x86, 737, "Greek OEM" },
172 		{ 0x87, 852, "Slovenian OEM" },
173 		{ 0x88, 857, "Turkish OEM" },
174 		{ 0x96, 10007, "Russian Macintosh" },
175 		{ 0x97, 10029, "Macintosh EE" },
176 		{ 0x98, 10006, "Greek Macintosh" },
177 		{ 0xC8, 1250, "Eastern European Windows" },
178 		{ 0xC9, 1251, "Russian Windows" },
179 		{ 0xCA, 1254, "Turkish Windows" },
180 		{ 0xCB, 1253, "Greek Windows" },
181 		{ 0xCC, 1257, "Baltic Windows" },
182 		{ 0x00, 0, NULL }
183 	};
184 	int i;
185 	guint8 hdr[32];
186 
187 	if (gsf_input_read (x->input, 32, hdr) == NULL) {
188 		*ret_error = go_error_info_new_str (_("Failed to read DBF header."));
189 		return;
190 	}
191 
192 	switch (hdr[0]) { /* FIXME: assuming dBASE III+, not IV */
193 	case 0x02:
194 #if XBASE_DEBUG > 0
195 		g_printerr ("FoxBASE\n");
196 #endif
197 		break;
198 	case 0x03:
199 #if XBASE_DEBUG > 0
200 		g_printerr ("FoxBASE+/dBASE III PLUS, no memo\n");
201 #endif
202 		break;
203 	case 0x30:
204 #if XBASE_DEBUG > 0
205 		g_printerr ("Visual FoxPro\n");
206 #endif
207 		break;
208 	case 0x43:
209 #if XBASE_DEBUG > 0
210 		g_printerr ("dBASE IV SQL table files, no memo\n");
211 #endif
212 		break;
213 	case 0x63:
214 #if XBASE_DEBUG > 0
215 		g_printerr ("dBASE IV SQL system files, no memo\n");
216 #endif
217 		break;
218 	case 0x83:
219 #if XBASE_DEBUG > 0
220 		g_printerr ("FoxBASE+/dBASE III PLUS, with memo\n");
221 #endif
222 		break;
223 	case 0x8B:
224 #if XBASE_DEBUG > 0
225 		g_printerr ("dBASE IV with memo\n");
226 #endif
227 		break;
228 	case 0xCB:
229 #if XBASE_DEBUG > 0
230 		g_printerr ("dBASE IV SQL table files, with memo\n");
231 #endif
232 		break;
233 	case 0xF5:
234 #if XBASE_DEBUG > 0
235 		g_printerr ("FoxPro 2.x (or earlier) with memo\n");
236 #endif
237 		break;
238 	case 0xFB:
239 #if XBASE_DEBUG > 0
240 		g_printerr ("FoxBASE\n");
241 #endif
242 		break;
243 	default:
244 		g_printerr ("unknown 0x%hhx\n", hdr[0]);
245 	}
246 
247 	x->records     = GSF_LE_GET_GUINT32 (hdr + 4);
248 	x->headerlen   = GSF_LE_GET_GUINT16 (hdr + 8);
249 	x->fieldlen    = GSF_LE_GET_GUINT16 (hdr + 10);
250 #if XBASE_DEBUG > 0
251 	g_printerr ("Last update (YY/MM/DD):\t%2hhd/%2hhd/%2hhd\n",hdr[1],hdr[2],hdr[3]); /* Y2K ?!? */
252 	g_printerr ("Records:\t%u\n", x->records);
253 	g_printerr ("Header length:\t%u\n", x->headerlen);
254 	g_printerr ("Record length:\t%u\n", x->fieldlen);
255 	g_printerr ("Reserved:\t%d\n", GSF_LE_GET_GUINT16 (hdr + 12));
256 	g_printerr ("Incomplete transaction:\t%hhd\n", hdr[14]);
257 	g_printerr ("Encryption flag:\t%d\n", hdr[15]);
258 	g_printerr ("Free record thread:\t%u\n", GSF_LE_GET_GUINT32 (hdr + 16));
259 	g_printerr ("Reserved (multi-user):\t%" G_GINT64_FORMAT "\n",
260 		 GSF_LE_GET_GUINT64(hdr + 20));
261 	g_printerr ("MDX flag:\t%d\n", hdr[28]); /* FIXME: decode */
262 	g_printerr ("Reserved:\t%d\n", GSF_LE_GET_GUINT16 (hdr + 30));
263 	g_printerr ("Language driver (code page):\t");
264 #endif
265 	x->char_map = (GIConv)-1;
266 	for (i = 0; codepages[i].id != 0 ; i++)
267 		if (codepages[i].id == hdr[29]) {
268 			x->char_map = gsf_msole_iconv_open_for_import (codepages[i].codepage);
269 			d (1, g_printerr ("%s (%d)\n",
270 				       codepages[i].name, codepages[i].codepage););
271 			break;
272 		}
273 	if (x->char_map == (GIConv)-1) {
274 #if XBASE_DEBUG > 0
275 		g_printerr ("unknown 0x%x\n!\n", hdr[29]);
276 #endif
277 		g_warning ("File has unknown or missing code page information (%x)",
278 			   hdr[29]);
279 		/* Got any better idea?  */
280 		x->char_map = g_iconv_open ("UTF-8", "ISO-8859-1");
281 	}
282 }
283 
284 static XBfield *
xbase_field_new(XBfile * file)285 xbase_field_new (XBfile *file)
286 {
287 	XBfield *field;
288 	guint8   buf[33];
289 	char *p;
290 	if (gsf_input_read (file->input, 2, buf) == NULL) { /* 1 byte out ? */
291 		g_warning ("xbase_field_new: fread error");
292 		return NULL;
293 	} else if (buf[0] == 0x0D || buf[0] == 0) { /* field array terminator */
294 		file->offset = gsf_input_tell (file->input);
295 		if (buf[0] == 0x00 && buf[1] == 0x0D) { /* FIXME: crude test, not in spec */
296 			if (gsf_input_seek (file->input, 263, G_SEEK_CUR)) /* skip DBC */
297 				g_warning ("xbase_field_new: fseek error");
298 		}
299 		return NULL;
300 	} else if (gsf_input_read (file->input, 30, buf+2) == NULL) {
301 		g_warning ("Field descriptor short");
302 		return NULL;
303 	}
304 #if XBASE_DEBUG > 0
305 	buf[32] = 0;
306 	g_printerr ("Field:\t'%s'\n", buf);
307 #endif
308 
309 	field = g_new (XBfield, 1);
310 	field->len = buf[16];
311 
312 	strncpy(field->name, buf, 10);
313 	field->name[10] = '\0';
314 	if ((p = strchr (field_types, field->type = buf[11])) == NULL)
315 		g_warning ("Unrecognised field type '%c'", field->type);
316 #if XBASE_DEBUG > 0
317 	else
318 		g_printerr ("Type:\t%c (%s)\n", field->type,
319 			field_type_descriptions [p-field_types]);
320 	g_printerr ("Data address:\t0x%.8X\n", GSF_LE_GET_GUINT32 (buf + 12));
321 	g_printerr ("Length:\t%d\n", field->len);
322 	g_printerr ("Decimal count:\t%d\n", buf[17]);
323 #endif
324 	if (file->fields) {
325 		XBfield *tmp = file->format[file->fields-1];
326 		field->pos = tmp->pos + tmp->len;
327 	} else
328 		field->pos = 0;
329 
330 	field->fmt = (field->type == 'D')
331 		? go_format_ref (go_format_default_date ())
332 		: NULL;
333 
334 	return field; /* FIXME: use more of buf if needed ? */
335 }
336 
337 XBfile *
xbase_open(GsfInput * input,GOErrorInfo ** ret_error)338 xbase_open (GsfInput *input, GOErrorInfo **ret_error)
339 {
340 	XBfile *ans;
341 	guint allocated = GNM_DEFAULT_COLS;
342 
343 	*ret_error = NULL;
344 
345 	ans = g_new (XBfile, 1);
346 	ans->input = input;
347 
348 	xbase_read_header (ans, ret_error);
349 	if (*ret_error) {
350 		g_free (ans);
351 		return NULL;
352 	}
353 
354 	ans->fields = 0;
355 	ans->format = g_new (XBfield *, allocated);
356 	while (ans->fields < GNM_MAX_COLS) {
357 		XBfield *field = xbase_field_new (ans);
358 		if (!field)
359 			break;
360 		if (ans->fields == allocated) {
361 			allocated *= 2;
362 			ans->format = g_renew (XBfield *, ans->format, allocated);
363 		}
364 		ans->format[ans->fields++] = field;
365 	}
366 
367 	return ans;
368 }
369 
370 void
xbase_close(XBfile * x)371 xbase_close (XBfile *x)
372 {
373 	unsigned i;
374 
375 	for (i = 0; i < x->fields; i++) {
376 		XBfield *field = x->format[i];
377 		go_format_unref (field->fmt);
378 		g_free (field);
379 	}
380 	gsf_iconv_close (x->char_map);
381 	g_free (x->format);
382 	g_free (x);
383 }
384