1 /**********************************************************************
2 * $Id: e00read.c,v 1.10 2009-02-24 20:03:50 aboudreault Exp $
3 *
4 * Name: e00read.c
5 * Project: Compressed E00 Read/Write library
6 * Language: ANSI C
7 * Purpose: Functions to read Compressed E00 files and return a stream
8 * of uncompressed lines.
9 * Author: Daniel Morissette, dmorissette@mapgears.com
10 *
11 * $Log: e00read.c,v $
12 * Revision 1.10 2009-02-24 20:03:50 aboudreault
13 * Added a short manual pages (#1875)
14 * Updated documentation and code examples (#247)
15 *
16 * Revision 1.9 2005-09-17 14:22:05 daniel
17 * Switch to MIT license, update refs to website and email address, and
18 * prepare for 1.0.0 release.
19 *
20 * Revision 1.8 1999/02/25 18:45:56 daniel
21 * Now use CPL for Error handling, Memory allocation, and File access
22 *
23 * Revision 1.7 1999/01/08 17:39:08 daniel
24 * Added E00ReadCallbackOpen()
25 *
26 * Revision 1.6 1998/11/13 16:34:08 daniel
27 * Fixed '\r' problem when reading E00 files from a PC under Unix
28 *
29 * Revision 1.5 1998/11/13 15:48:08 daniel
30 * Simplified the decoding of the compression codes for numbers
31 * (use a logical rule instead of going case by case)
32 *
33 * Revision 1.4 1998/11/02 18:34:29 daniel
34 * Added E00ErrorReset() calls. Replace "EXP 1" by "EXP 0" on read.
35 *
36 * Revision 1.1 1998/10/29 13:26:00 daniel
37 * Initial revision
38 *
39 **********************************************************************
40 * Copyright (c) 1998-2005, Daniel Morissette
41 *
42 * Permission is hereby granted, free of charge, to any person obtaining a
43 * copy of this software and associated documentation files (the "Software"),
44 * to deal in the Software without restriction, including without limitation
45 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
46 * and/or sell copies of the Software, and to permit persons to whom the
47 * Software is furnished to do so, subject to the following conditions:
48 *
49 * The above copyright notice and this permission notice shall be included
50 * in all copies or substantial portions of the Software.
51 *
52 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
53 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
54 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
55 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
56 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
57 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
58 * DEALINGS IN THE SOFTWARE.
59 *
60 **********************************************************************/
61
62 #include <stdlib.h>
63 #include <string.h>
64 #include <ctype.h>
65 #include <errno.h>
66
67 #include "e00compr.h"
68
69 static void _ReadNextSourceLine(E00ReadPtr psInfo);
70 static const char *_UncompressNextLine(E00ReadPtr psInfo);
71
72 /**********************************************************************
73 * _E00ReadTestOpen()
74 *
75 * Given a pre-initialized E00ReadPtr, this function will make sure
76 * that the file is really a E00 file, and also establish if it is
77 * compressed or not... setting the structure members by the same way.
78 *
79 * Returns NULL (and destroys the E00ReadPtr) if the file does not
80 * appear to be a valid E00 file.
81 **********************************************************************/
_E00ReadTestOpen(E00ReadPtr psInfo)82 static E00ReadPtr _E00ReadTestOpen(E00ReadPtr psInfo)
83 {
84
85 /* Check that the file is in E00 format.
86 */
87 _ReadNextSourceLine(psInfo);
88 if (!psInfo->bEOF && strncmp(psInfo->szInBuf, "EXP ", 4) == 0)
89 {
90 /* We should be in presence of a valid E00 file...
91 * Is the file compressed or not?
92 *
93 * Note: we cannot really rely on the number that follows the EXP to
94 * establish if the file is compressed since we sometimes encounter
95 * uncompressed files that start with a "EXP 1" line!!!
96 *
97 * The best test is to read the first non-empty line: if the file is
98 * compressed, the first line of data should be 79 or 80 characters
99 * long and contain several '~' characters.
100 */
101 do
102 {
103 _ReadNextSourceLine(psInfo);
104 }while(!psInfo->bEOF &&
105 (psInfo->szInBuf[0] == '\0' || isspace(psInfo->szInBuf[0])) );
106
107 if (!psInfo->bEOF &&
108 (strlen(psInfo->szInBuf)==79 || strlen(psInfo->szInBuf)==80) &&
109 strchr(psInfo->szInBuf, '~') != NULL )
110 psInfo->bIsCompressed = 1;
111
112 /* Move the Read ptr ready to read at the beginning of the file
113 */
114 E00ReadRewind(psInfo);
115 }
116 else
117 {
118 CPLFree(psInfo);
119 psInfo = NULL;
120 }
121
122 return psInfo;
123 }
124
125 /**********************************************************************
126 * E00ReadOpen()
127 *
128 * Try to open a E00 file given its filename and return a E00ReadPtr handle.
129 *
130 * Returns NULL if the file could not be opened or if it does not
131 * appear to be a valid E00 file.
132 **********************************************************************/
E00ReadOpen(const char * pszFname)133 E00ReadPtr E00ReadOpen(const char *pszFname)
134 {
135 E00ReadPtr psInfo = NULL;
136 FILE *fp;
137
138 CPLErrorReset();
139
140 /* Open the file
141 */
142 fp = VSIFOpen(pszFname, "rt");
143 if (fp == NULL)
144 {
145 CPLError(CE_Failure, CPLE_OpenFailed,
146 "Failed to open %s: %s", pszFname, strerror(errno));
147 return NULL;
148 }
149
150 /* File was succesfully opened, allocate and initialize a
151 * E00ReadPtr handle and check that the file is valid.
152 */
153 psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
154
155 psInfo->fp = fp;
156
157 psInfo = _E00ReadTestOpen(psInfo);
158
159 if (psInfo == NULL)
160 {
161 CPLError(CE_Failure, CPLE_OpenFailed,
162 "%s is not a valid E00 file.", pszFname);
163 }
164
165 return psInfo;
166 }
167
168 /**********************************************************************
169 * E00ReadCallbackOpen()
170 *
171 * This is an alternative to E00ReadOpen() for cases where you want to
172 * do all the file management yourself. You open/close the file yourself
173 * and provide 2 callback functions: to read from the file and rewind the
174 * file pointer. pRefData is your handle on the physical file and can
175 * be whatever you want... it is not used by the library, it will be
176 * passed directly to your 2 callback functions when they are called.
177 *
178 * The callback functions must have the following C prototype:
179 *
180 * const char *myReadNextLine(void *pRefData);
181 * void myReadRewind(void *pRefData);
182 *
183 * myReadNextLine() should return a reference to its own internal
184 * buffer, or NULL if an error happens or EOF is reached.
185 *
186 * E00ReadCallbackOpen() returns a E00ReadPtr handle or NULL if the file
187 * does not appear to be a valid E00 file.
188 **********************************************************************/
E00ReadCallbackOpen(void * pRefData,const char * (* pfnReadNextLine)(void *),void (* pfnReadRewind)(void *))189 E00ReadPtr E00ReadCallbackOpen(void *pRefData,
190 const char * (*pfnReadNextLine)(void *),
191 void (*pfnReadRewind)(void *))
192 {
193 E00ReadPtr psInfo = NULL;
194
195 CPLErrorReset();
196
197 /* Make sure we received valid function pointers
198 */
199 if (pfnReadNextLine == NULL || pfnReadRewind == NULL)
200 {
201 CPLError(CE_Failure, CPLE_IllegalArg,
202 "Invalid function pointers!");
203 return NULL;
204 }
205
206 /* Allocate and initialize a
207 * E00ReadPtr handle and check that the file is valid.
208 */
209 psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
210
211 psInfo->pRefData = pRefData;
212 psInfo->pfnReadNextLine = pfnReadNextLine;
213 psInfo->pfnReadRewind = pfnReadRewind;
214
215 psInfo = _E00ReadTestOpen(psInfo);
216
217 if (psInfo == NULL)
218 {
219 CPLError(CE_Failure, CPLE_OpenFailed,
220 "This is not a valid E00 file.");
221 }
222
223 return psInfo;
224 }
225
226 /**********************************************************************
227 * E00ReadClose()
228 *
229 * Close input file and release any memory used by the E00ReadPtr.
230 **********************************************************************/
E00ReadClose(E00ReadPtr psInfo)231 void E00ReadClose(E00ReadPtr psInfo)
232 {
233 CPLErrorReset();
234
235 if (psInfo)
236 {
237 if (psInfo->fp)
238 VSIFClose(psInfo->fp);
239 CPLFree(psInfo);
240 }
241 }
242
243 /**********************************************************************
244 * E00ReadRewind()
245 *
246 * Rewind the E00ReadPtr. Allows to start another read pass on the
247 * input file.
248 **********************************************************************/
E00ReadRewind(E00ReadPtr psInfo)249 void E00ReadRewind(E00ReadPtr psInfo)
250 {
251 CPLErrorReset();
252
253 psInfo->szInBuf[0] = psInfo->szOutBuf[0] = '\0';
254 psInfo->iInBufPtr = 0;
255
256 psInfo->nInputLineNo = 0;
257
258 if (psInfo->pfnReadRewind == NULL)
259 VSIRewind(psInfo->fp);
260 else
261 psInfo->pfnReadRewind(psInfo->pRefData);
262
263 psInfo->bEOF = 0;
264 }
265
266 /**********************************************************************
267 * E00ReadNextLine()
268 *
269 * Return the next line of input from the E00 file or NULL if we reached EOF.
270 *
271 * Returns a reference to an internal buffer whose contents will be valid
272 * only until the next call to this function.
273 **********************************************************************/
E00ReadNextLine(E00ReadPtr psInfo)274 const char *E00ReadNextLine(E00ReadPtr psInfo)
275 {
276 const char *pszLine = NULL;
277 char *pszPtr;
278
279 CPLErrorReset();
280
281 if (psInfo && !psInfo->bEOF)
282 {
283 if (!psInfo->bIsCompressed)
284 {
285 /* Uncompressed file... return line directly.
286 */
287 _ReadNextSourceLine(psInfo);
288 pszLine = psInfo->szInBuf;
289 }
290 else if (psInfo->bIsCompressed && psInfo->nInputLineNo == 0)
291 {
292 /* Header line in a compressed file... return line
293 * after replacing "EXP 1" with "EXP 0". E00ReadOpen()
294 * has already verified that this line starts with "EXP "
295 */
296 _ReadNextSourceLine(psInfo);
297 if ( (pszPtr = strstr(psInfo->szInBuf, " 1")) != NULL)
298 pszPtr[1] = '0';
299 pszLine = psInfo->szInBuf;
300 }
301 else
302 {
303 if (psInfo->nInputLineNo == 1)
304 {
305 /* We just read the header line... reload the input buffer
306 */
307 _ReadNextSourceLine(psInfo);
308 }
309
310 /* Uncompress the next line of input and return it
311 */
312 pszLine = _UncompressNextLine(psInfo);
313 }
314
315 /* If we just reached EOF then make sure we don't add an extra
316 * empty line at the end of the uncompressed oputput.
317 */
318 if (psInfo->bEOF && strlen(pszLine) == 0)
319 pszLine = NULL;
320 }
321
322 return pszLine;
323 }
324
325 /**********************************************************************
326 * _ReadNextSourceLine()
327 *
328 * Loads the next line from the source file in psInfo.
329 *
330 * psInfo->bEOF should be checked after this call.
331 **********************************************************************/
_ReadNextSourceLine(E00ReadPtr psInfo)332 static void _ReadNextSourceLine(E00ReadPtr psInfo)
333 {
334 if (!psInfo->bEOF)
335 {
336 psInfo->iInBufPtr = 0;
337 psInfo->szInBuf[0] = '\0';
338
339 /* Read either using fgets() or psInfo->pfnReadNextLine()
340 * depending on the way the file was opened...
341 */
342 if (psInfo->pfnReadNextLine == NULL)
343 {
344 if (VSIFGets(psInfo->szInBuf,E00_READ_BUF_SIZE,psInfo->fp) == NULL)
345 {
346 /* We reached EOF
347 */
348 psInfo->bEOF = 1;
349 }
350 }
351 else
352 {
353 const char *pszLine;
354 pszLine = psInfo->pfnReadNextLine(psInfo->pRefData);
355 if (pszLine)
356 {
357 strncpy(psInfo->szInBuf, pszLine, E00_READ_BUF_SIZE);
358 }
359 else
360 {
361 /* We reached EOF
362 */
363 psInfo->bEOF = 1;
364 }
365 }
366
367 if (!psInfo->bEOF)
368 {
369 /* A new line was succesfully read. Remove trailing '\n' if any.
370 * (Note: For Unix systems, we also have to check for '\r')
371 */
372 int nLen;
373 nLen = strlen(psInfo->szInBuf);
374 while(nLen > 0 && (psInfo->szInBuf[nLen-1] == '\n' ||
375 psInfo->szInBuf[nLen-1] == '\r' ) )
376 {
377 nLen--;
378 psInfo->szInBuf[nLen] = '\0';
379 }
380
381 psInfo->nInputLineNo++;
382 }
383 }
384 }
385
386
387 /**********************************************************************
388 * _GetNextSourceChar()
389 *
390 * Returns the next char from the source file input buffer... and
391 * reload the input buffer when necessary... this function makes the
392 * whole input file appear as one huge null-terminated string with
393 * no line delimiters.
394 *
395 * Will return '\0' when EOF is reached.
396 **********************************************************************/
_GetNextSourceChar(E00ReadPtr psInfo)397 static char _GetNextSourceChar(E00ReadPtr psInfo)
398 {
399 char c = '\0';
400
401 if (!psInfo->bEOF)
402 {
403 if (psInfo->szInBuf[psInfo->iInBufPtr] == '\0')
404 {
405 _ReadNextSourceLine(psInfo);
406 c = _GetNextSourceChar(psInfo);
407 }
408 else
409 {
410 c = psInfo->szInBuf[psInfo->iInBufPtr++];
411 }
412 }
413
414 return c;
415 }
416
417 /**********************************************************************
418 * _UngetSourceChar()
419 *
420 * Reverse the effect of the previous call to _GetNextSourceChar() by
421 * moving the input buffer pointer back 1 character.
422 *
423 * This function can be called only once per call to _GetNextSourceChar()
424 * (i.e. you cannot unget more than one character) otherwise the pointer
425 * could move before the beginning of the input buffer.
426 **********************************************************************/
_UngetSourceChar(E00ReadPtr psInfo)427 static void _UngetSourceChar(E00ReadPtr psInfo)
428 {
429 if (psInfo->iInBufPtr > 0)
430 psInfo->iInBufPtr--;
431 else
432 {
433 /* This error can happen only if _UngetSourceChar() is called
434 * twice in a row (which should never happen!).
435 */
436 CPLError(CE_Failure, CPLE_AssertionFailed,
437 "UNEXPECTED INTERNAL ERROR: _UngetSourceChar() "
438 "failed while reading line %d.", psInfo->nInputLineNo);
439 }
440 }
441
442 /**********************************************************************
443 * _UncompressNextLine()
444 *
445 * Uncompress one line of input and return a reference to an internal
446 * buffer containing the uncompressed output.
447 **********************************************************************/
_UncompressNextLine(E00ReadPtr psInfo)448 static const char *_UncompressNextLine(E00ReadPtr psInfo)
449 {
450 char c;
451 int bEOL = 0; /* Set to 1 when End of Line reached */
452 int iOutBufPtr = 0, i, n;
453 int iDecimalPoint, bOddNumDigits, iCurDigit;
454 char *pszExp;
455 int bPreviousCodeWasNumeric = 0;
456
457 while(!bEOL && (c=_GetNextSourceChar(psInfo)) != '\0')
458 {
459 if (c != '~')
460 {
461 /* Normal character... just copy it
462 */
463 psInfo->szOutBuf[iOutBufPtr++] = c;
464 bPreviousCodeWasNumeric = 0;
465 }
466 else /* c == '~' */
467 {
468 /* ========================================================
469 * Found an encoded sequence.
470 * =======================================================*/
471 c = _GetNextSourceChar(psInfo);
472
473 /* --------------------------------------------------------
474 * Compression level 1: only spaces, '~' and '\n' are encoded
475 * -------------------------------------------------------*/
476 if (c == ' ')
477 {
478 /* "~ " followed by number of spaces
479 */
480 c = _GetNextSourceChar(psInfo);
481 n = c - ' ';
482 for(i=0; i<n; i++)
483 psInfo->szOutBuf[iOutBufPtr++] = ' ';
484 bPreviousCodeWasNumeric = 0;
485 }
486 else if (c == '}')
487 {
488 /* "~}" == '\n'
489 */
490 bEOL = 1;
491 bPreviousCodeWasNumeric = 0;
492 }
493 else if (bPreviousCodeWasNumeric)
494 {
495 /* If the previous code was numeric, then the only valid code
496 * sequences are the ones above: "~ " and "~}". If we end up
497 * here, it is because the number was followed by a '~' but
498 * this '~' was not a code, it only marked the end of a
499 * number that was not followed by any space.
500 *
501 * We should simply ignore the '~' and return the character
502 * that follows it directly.
503 */
504 psInfo->szOutBuf[iOutBufPtr++] = c;
505 bPreviousCodeWasNumeric = 0;
506 }
507 else if (c == '~' || c == '-')
508 {
509 /* "~~" and "~-" are simple escape sequences for '~' and '-'
510 */
511 psInfo->szOutBuf[iOutBufPtr++] = c;
512 }
513 /* --------------------------------------------------------
514 * Compression level 2: numeric values are encoded.
515 *
516 * All codes for this level are in the form "~ c0 c1 c2 ... cn"
517 * where:
518 *
519 * ~ marks the beginning of a new code sequence
520 *
521 * c0 is a single character code defining the format
522 * of the number (decimal position, exponent,
523 * and even or odd number of digits)
524 *
525 * c1 c2 ... cn each of these characters represent a pair of
526 * digits of the encoded value with '!' == 00
527 * values 92..99 are encoded on 2 chars that
528 * must be added to each other
529 * (i.e. 92 == }!, 93 == }", ...)
530 *
531 * The sequence ends with a ' ' or a '~' character
532 * -------------------------------------------------------*/
533 else if (c >= '!' && c <= 'z')
534 {
535 /* The format code defines 3 characteristics of the final number:
536 * - Presence of a decimal point and its position
537 * - Presence of an exponent, and its sign
538 * - Odd or even number of digits
539 */
540 n = c - '!';
541 iDecimalPoint = n % 15; /* 0 = no decimal point */
542 bOddNumDigits = n / 45; /* 0 = even num.digits, 1 = odd */
543 n = n / 15;
544 if ( n % 3 == 1 )
545 pszExp = "E+";
546 else if (n % 3 == 2 )
547 pszExp = "E-";
548 else
549 pszExp = NULL;
550
551 /* Decode the c1 c2 ... cn value and apply the format.
552 * Read characters until we encounter a ' ' or a '~'
553 */
554 iCurDigit = 0;
555 while((c=_GetNextSourceChar(psInfo)) != '\0' &&
556 c != ' ' && c != '~')
557 {
558 n = c - '!';
559 if (n == 92 && (c=_GetNextSourceChar(psInfo)) != '\0')
560 n += c - '!';
561
562 psInfo->szOutBuf[iOutBufPtr++] = '0' + n/10;
563
564 if (++iCurDigit == iDecimalPoint)
565 psInfo->szOutBuf[iOutBufPtr++] = '.';
566
567 psInfo->szOutBuf[iOutBufPtr++] = '0' + n%10;
568
569 if (++iCurDigit == iDecimalPoint)
570 psInfo->szOutBuf[iOutBufPtr++] = '.';
571 }
572
573 if (c == '~' || c == ' ')
574 {
575 bPreviousCodeWasNumeric = 1;
576 _UngetSourceChar(psInfo);
577 }
578
579 /* If odd number of digits, then flush the last one
580 */
581 if (bOddNumDigits)
582 iOutBufPtr--;
583
584 /* Insert the exponent string before the 2 last digits
585 * (we assume the exponent string is 2 chars. long)
586 */
587 if (pszExp)
588 {
589 for(i=0; i<2;i++)
590 {
591 psInfo->szOutBuf[iOutBufPtr] =
592 psInfo->szOutBuf[iOutBufPtr-2];
593 psInfo->szOutBuf[iOutBufPtr-2] = pszExp[i];
594 iOutBufPtr++;
595 }
596 }
597 }
598 else
599 {
600 /* Unsupported code sequence... this is a possibility
601 * given the fact that this library was written by
602 * reverse-engineering the format!
603 *
604 * Send an error to the user and abort.
605 *
606 * If this error ever happens, and you are convinced that
607 * the input file is not corrupted, then please report it to
608 * me at dmorissette@mapgears.com, quoting the section of the input
609 * file that produced it, and I'll do my best to add support
610 * for this code sequence.
611 */
612 CPLError(CE_Failure, CPLE_NotSupported,
613 "Unexpected code \"~%c\" encountered in line %d.",
614 c, psInfo->nInputLineNo);
615
616 /* Force the program to abort by simulating a EOF
617 */
618 psInfo->bEOF = 1;
619 bEOL = 1;
620 }
621
622 }/* if c == '~' */
623
624 /* E00 lines should NEVER be longer than 80 chars. if we passed
625 * that limit, then the input file is likely corrupt.
626 */
627 if (iOutBufPtr > 80)
628 {
629 CPLError(CE_Failure, CPLE_FileIO,
630 "Uncompressed line longer than 80 chars. "
631 "Input file possibly corrupt around line %d.",
632 psInfo->nInputLineNo);
633 /* Force the program to abort by simulating a EOF
634 */
635 psInfo->bEOF = 1;
636 bEOL = 1;
637 }
638
639 }/* while !EOL */
640
641 psInfo->szOutBuf[iOutBufPtr++] = '\0';
642
643 return psInfo->szOutBuf;
644 }
645
646
647