1 /**********************************************************************
2  * $Id: e00read.c,v 1.10 2009-02-24 20:03:50 aboudreault Exp $
3  *
4  * Name:     e00read.c
5  * Project:  Compressed E00 Read/Write library
6  * Language: ANSI C
7  * Purpose:  Functions to read Compressed E00 files and return a stream
8  *           of uncompressed lines.
9  * Author:   Daniel Morissette, dmorissette@mapgears.com
10  *
11  * $Log: e00read.c,v $
12  * Revision 1.10  2009-02-24 20:03:50  aboudreault
13  * Added a short manual pages (#1875)
14  * Updated documentation and code examples (#247)
15  *
16  * Revision 1.9  2005-09-17 14:22:05  daniel
17  * Switch to MIT license, update refs to website and email address, and
18  * prepare for 1.0.0 release.
19  *
20  * Revision 1.8  1999/02/25 18:45:56  daniel
21  * Now use CPL for Error handling, Memory allocation, and File access
22  *
23  * Revision 1.7  1999/01/08 17:39:08  daniel
24  * Added E00ReadCallbackOpen()
25  *
26  * Revision 1.6  1998/11/13 16:34:08  daniel
27  * Fixed '\r' problem when reading E00 files from a PC under Unix
28  *
29  * Revision 1.5  1998/11/13 15:48:08  daniel
30  * Simplified the decoding of the compression codes for numbers
31  * (use a logical rule instead of going case by case)
32  *
33  * Revision 1.4  1998/11/02 18:34:29  daniel
34  * Added E00ErrorReset() calls.  Replace "EXP  1" by "EXP  0" on read.
35  *
36  * Revision 1.1  1998/10/29 13:26:00  daniel
37  * Initial revision
38  *
39  **********************************************************************
40  * Copyright (c) 1998-2005, Daniel Morissette
41  *
42  * Permission is hereby granted, free of charge, to any person obtaining a
43  * copy of this software and associated documentation files (the "Software"),
44  * to deal in the Software without restriction, including without limitation
45  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
46  * and/or sell copies of the Software, and to permit persons to whom the
47  * Software is furnished to do so, subject to the following conditions:
48  *
49  * The above copyright notice and this permission notice shall be included
50  * in all copies or substantial portions of the Software.
51  *
52  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
53  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
54  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
55  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
56  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
57  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
58  * DEALINGS IN THE SOFTWARE.
59  *
60  **********************************************************************/
61 
62 #include <stdlib.h>
63 #include <string.h>
64 #include <ctype.h>
65 #include <errno.h>
66 
67 #include "e00compr.h"
68 
69 static void _ReadNextSourceLine(E00ReadPtr psInfo);
70 static const char *_UncompressNextLine(E00ReadPtr psInfo);
71 
72 /**********************************************************************
73  *                          _E00ReadTestOpen()
74  *
75  * Given a pre-initialized E00ReadPtr, this function will make sure
76  * that the file is really a E00 file, and also establish if it is
77  * compressed or not... setting the structure members by the same way.
78  *
79  * Returns NULL (and destroys the E00ReadPtr) if the file does not
80  * appear to be a valid E00 file.
81  **********************************************************************/
_E00ReadTestOpen(E00ReadPtr psInfo)82 static E00ReadPtr  _E00ReadTestOpen(E00ReadPtr psInfo)
83 {
84 
85     /* Check that the file is in E00 format.
86      */
87     _ReadNextSourceLine(psInfo);
88     if (!psInfo->bEOF && strncmp(psInfo->szInBuf, "EXP ", 4) == 0)
89     {
90         /* We should be in presence of a valid E00 file...
91          * Is the file compressed or not?
92          *
93          * Note: we cannot really rely on the number that follows the EXP to
94          * establish if the file is compressed since we sometimes encounter
95          * uncompressed files that start with a "EXP 1" line!!!
96          *
97          * The best test is to read the first non-empty line: if the file is
98          * compressed, the first line of data should be 79 or 80 characters
99          * long and contain several '~' characters.
100          */
101         do
102         {
103             _ReadNextSourceLine(psInfo);
104         }while(!psInfo->bEOF &&
105                (psInfo->szInBuf[0] == '\0' || isspace(psInfo->szInBuf[0])) );
106 
107          if (!psInfo->bEOF &&
108              (strlen(psInfo->szInBuf)==79 || strlen(psInfo->szInBuf)==80) &&
109              strchr(psInfo->szInBuf, '~') != NULL )
110              psInfo->bIsCompressed = 1;
111 
112          /* Move the Read ptr ready to read at the beginning of the file
113           */
114          E00ReadRewind(psInfo);
115     }
116     else
117     {
118         CPLFree(psInfo);
119         psInfo = NULL;
120     }
121 
122     return psInfo;
123 }
124 
125 /**********************************************************************
126  *                          E00ReadOpen()
127  *
128  * Try to open a E00 file given its filename and return a E00ReadPtr handle.
129  *
130  * Returns NULL if the file could not be opened or if it does not
131  * appear to be a valid E00 file.
132  **********************************************************************/
E00ReadOpen(const char * pszFname)133 E00ReadPtr  E00ReadOpen(const char *pszFname)
134 {
135     E00ReadPtr  psInfo = NULL;
136     FILE        *fp;
137 
138     CPLErrorReset();
139 
140     /* Open the file
141      */
142     fp = VSIFOpen(pszFname, "rt");
143     if (fp == NULL)
144     {
145         CPLError(CE_Failure, CPLE_OpenFailed,
146                  "Failed to open %s: %s", pszFname, strerror(errno));
147         return NULL;
148     }
149 
150     /* File was succesfully opened, allocate and initialize a
151      * E00ReadPtr handle and check that the file is valid.
152      */
153     psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
154 
155     psInfo->fp = fp;
156 
157     psInfo = _E00ReadTestOpen(psInfo);
158 
159     if (psInfo == NULL)
160     {
161         CPLError(CE_Failure, CPLE_OpenFailed,
162                  "%s is not a valid E00 file.", pszFname);
163     }
164 
165     return psInfo;
166 }
167 
168 /**********************************************************************
169  *                          E00ReadCallbackOpen()
170  *
171  * This is an alternative to E00ReadOpen() for cases where you want to
172  * do all the file management yourself.  You open/close the file yourself
173  * and provide 2 callback functions: to read from the file and rewind the
174  * file pointer.  pRefData is your handle on the physical file and can
175  * be whatever you want... it is not used by the library, it will be
176  * passed directly to your 2 callback functions when they are called.
177  *
178  * The callback functions must have the following C prototype:
179  *
180  *   const char *myReadNextLine(void *pRefData);
181  *   void        myReadRewind(void *pRefData);
182  *
183  *   myReadNextLine() should return a reference to its own internal
184  *   buffer, or NULL if an error happens or EOF is reached.
185  *
186  * E00ReadCallbackOpen() returns a E00ReadPtr handle or NULL if the file
187  * does not appear to be a valid E00 file.
188  **********************************************************************/
E00ReadCallbackOpen(void * pRefData,const char * (* pfnReadNextLine)(void *),void (* pfnReadRewind)(void *))189 E00ReadPtr  E00ReadCallbackOpen(void *pRefData,
190                                 const char * (*pfnReadNextLine)(void *),
191                                 void (*pfnReadRewind)(void *))
192 {
193     E00ReadPtr  psInfo = NULL;
194 
195     CPLErrorReset();
196 
197     /* Make sure we received valid function pointers
198      */
199     if (pfnReadNextLine == NULL || pfnReadRewind == NULL)
200     {
201         CPLError(CE_Failure, CPLE_IllegalArg,
202                  "Invalid function pointers!");
203         return NULL;
204     }
205 
206     /* Allocate and initialize a
207      * E00ReadPtr handle and check that the file is valid.
208      */
209     psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
210 
211     psInfo->pRefData = pRefData;
212     psInfo->pfnReadNextLine = pfnReadNextLine;
213     psInfo->pfnReadRewind = pfnReadRewind;
214 
215     psInfo = _E00ReadTestOpen(psInfo);
216 
217     if (psInfo == NULL)
218     {
219         CPLError(CE_Failure, CPLE_OpenFailed,
220                  "This is not a valid E00 file.");
221     }
222 
223     return psInfo;
224 }
225 
226 /**********************************************************************
227  *                          E00ReadClose()
228  *
229  * Close input file and release any memory used by the E00ReadPtr.
230  **********************************************************************/
E00ReadClose(E00ReadPtr psInfo)231 void    E00ReadClose(E00ReadPtr psInfo)
232 {
233     CPLErrorReset();
234 
235     if (psInfo)
236     {
237         if (psInfo->fp)
238             VSIFClose(psInfo->fp);
239         CPLFree(psInfo);
240     }
241 }
242 
243 /**********************************************************************
244  *                          E00ReadRewind()
245  *
246  * Rewind the E00ReadPtr.  Allows to start another read pass on the
247  * input file.
248  **********************************************************************/
E00ReadRewind(E00ReadPtr psInfo)249 void    E00ReadRewind(E00ReadPtr psInfo)
250 {
251     CPLErrorReset();
252 
253     psInfo->szInBuf[0] = psInfo->szOutBuf[0] = '\0';
254     psInfo->iInBufPtr = 0;
255 
256     psInfo->nInputLineNo = 0;
257 
258     if (psInfo->pfnReadRewind == NULL)
259         VSIRewind(psInfo->fp);
260     else
261         psInfo->pfnReadRewind(psInfo->pRefData);
262 
263     psInfo->bEOF = 0;
264 }
265 
266 /**********************************************************************
267  *                          E00ReadNextLine()
268  *
269  * Return the next line of input from the E00 file or NULL if we reached EOF.
270  *
271  * Returns a reference to an internal buffer whose contents will be valid
272  * only until the next call to this function.
273  **********************************************************************/
E00ReadNextLine(E00ReadPtr psInfo)274 const char *E00ReadNextLine(E00ReadPtr psInfo)
275 {
276     const char *pszLine = NULL;
277     char *pszPtr;
278 
279     CPLErrorReset();
280 
281     if (psInfo && !psInfo->bEOF)
282     {
283         if (!psInfo->bIsCompressed)
284         {
285             /* Uncompressed file... return line directly.
286              */
287             _ReadNextSourceLine(psInfo);
288             pszLine = psInfo->szInBuf;
289         }
290         else if (psInfo->bIsCompressed && psInfo->nInputLineNo == 0)
291         {
292             /* Header line in a compressed file... return line
293              * after replacing "EXP  1" with "EXP  0".  E00ReadOpen()
294              * has already verified that this line starts with "EXP "
295              */
296             _ReadNextSourceLine(psInfo);
297             if ( (pszPtr = strstr(psInfo->szInBuf, " 1")) != NULL)
298                 pszPtr[1] = '0';
299             pszLine = psInfo->szInBuf;
300         }
301         else
302         {
303             if (psInfo->nInputLineNo == 1)
304             {
305                 /* We just read the header line... reload the input buffer
306                  */
307                 _ReadNextSourceLine(psInfo);
308             }
309 
310             /* Uncompress the next line of input and return it
311              */
312             pszLine = _UncompressNextLine(psInfo);
313         }
314 
315         /* If we just reached EOF then make sure we don't add an extra
316          * empty line at the end of the uncompressed oputput.
317          */
318         if (psInfo->bEOF && strlen(pszLine) == 0)
319             pszLine = NULL;
320     }
321 
322     return pszLine;
323 }
324 
325 /**********************************************************************
326  *                          _ReadNextSourceLine()
327  *
328  * Loads the next line from the source file in psInfo.
329  *
330  * psInfo->bEOF should be checked after this call.
331  **********************************************************************/
_ReadNextSourceLine(E00ReadPtr psInfo)332 static void _ReadNextSourceLine(E00ReadPtr psInfo)
333 {
334     if (!psInfo->bEOF)
335     {
336         psInfo->iInBufPtr = 0;
337         psInfo->szInBuf[0] = '\0';
338 
339         /* Read either using fgets() or psInfo->pfnReadNextLine()
340          * depending on the way the file was opened...
341          */
342         if (psInfo->pfnReadNextLine == NULL)
343         {
344             if (VSIFGets(psInfo->szInBuf,E00_READ_BUF_SIZE,psInfo->fp) == NULL)
345             {
346                 /* We reached EOF
347                  */
348                 psInfo->bEOF = 1;
349             }
350         }
351         else
352         {
353             const char *pszLine;
354             pszLine = psInfo->pfnReadNextLine(psInfo->pRefData);
355             if (pszLine)
356             {
357                 strncpy(psInfo->szInBuf, pszLine, E00_READ_BUF_SIZE);
358             }
359             else
360             {
361                 /* We reached EOF
362                  */
363                 psInfo->bEOF = 1;
364             }
365         }
366 
367         if (!psInfo->bEOF)
368         {
369             /* A new line was succesfully read.  Remove trailing '\n' if any.
370              * (Note: For Unix systems, we also have to check for '\r')
371              */
372             int nLen;
373             nLen = strlen(psInfo->szInBuf);
374             while(nLen > 0 && (psInfo->szInBuf[nLen-1] == '\n' ||
375                                psInfo->szInBuf[nLen-1] == '\r'   ) )
376             {
377                 nLen--;
378                 psInfo->szInBuf[nLen] = '\0';
379             }
380 
381             psInfo->nInputLineNo++;
382         }
383     }
384 }
385 
386 
387 /**********************************************************************
388  *                          _GetNextSourceChar()
389  *
390  * Returns the next char from the source file input buffer... and
391  * reload the input buffer when necessary... this function makes the
392  * whole input file appear as one huge null-terminated string with
393  * no line delimiters.
394  *
395  * Will return '\0' when EOF is reached.
396  **********************************************************************/
_GetNextSourceChar(E00ReadPtr psInfo)397 static char _GetNextSourceChar(E00ReadPtr psInfo)
398 {
399     char c = '\0';
400 
401     if (!psInfo->bEOF)
402     {
403         if (psInfo->szInBuf[psInfo->iInBufPtr] == '\0')
404         {
405             _ReadNextSourceLine(psInfo);
406             c = _GetNextSourceChar(psInfo);
407         }
408         else
409         {
410             c = psInfo->szInBuf[psInfo->iInBufPtr++];
411         }
412     }
413 
414     return c;
415 }
416 
417 /**********************************************************************
418  *                          _UngetSourceChar()
419  *
420  * Reverse the effect of the previous call to _GetNextSourceChar() by
421  * moving the input buffer pointer back 1 character.
422  *
423  * This function can be called only once per call to _GetNextSourceChar()
424  * (i.e. you cannot unget more than one character) otherwise the pointer
425  * could move before the beginning of the input buffer.
426  **********************************************************************/
_UngetSourceChar(E00ReadPtr psInfo)427 static void _UngetSourceChar(E00ReadPtr psInfo)
428 {
429     if (psInfo->iInBufPtr > 0)
430         psInfo->iInBufPtr--;
431     else
432     {
433         /* This error can happen only if _UngetSourceChar() is called
434          * twice in a row (which should never happen!).
435          */
436         CPLError(CE_Failure, CPLE_AssertionFailed,
437                  "UNEXPECTED INTERNAL ERROR: _UngetSourceChar() "
438                       "failed while reading line %d.", psInfo->nInputLineNo);
439     }
440 }
441 
442 /**********************************************************************
443  *                          _UncompressNextLine()
444  *
445  * Uncompress one line of input and return a reference to an internal
446  * buffer containing the uncompressed output.
447  **********************************************************************/
_UncompressNextLine(E00ReadPtr psInfo)448 static const char *_UncompressNextLine(E00ReadPtr psInfo)
449 {
450     char    c;
451     int     bEOL = 0;   /* Set to 1 when End of Line reached */
452     int     iOutBufPtr = 0, i, n;
453     int     iDecimalPoint, bOddNumDigits, iCurDigit;
454     char    *pszExp;
455     int     bPreviousCodeWasNumeric = 0;
456 
457     while(!bEOL && (c=_GetNextSourceChar(psInfo)) != '\0')
458     {
459         if (c != '~')
460         {
461             /* Normal character... just copy it
462              */
463             psInfo->szOutBuf[iOutBufPtr++] = c;
464             bPreviousCodeWasNumeric = 0;
465         }
466         else /* c == '~' */
467         {
468             /* ========================================================
469              * Found an encoded sequence.
470              * =======================================================*/
471             c = _GetNextSourceChar(psInfo);
472 
473             /* --------------------------------------------------------
474              * Compression level 1: only spaces, '~' and '\n' are encoded
475              * -------------------------------------------------------*/
476             if (c == ' ')
477             {
478                 /* "~ " followed by number of spaces
479                  */
480                 c = _GetNextSourceChar(psInfo);
481                 n = c - ' ';
482                 for(i=0; i<n; i++)
483                     psInfo->szOutBuf[iOutBufPtr++] = ' ';
484                 bPreviousCodeWasNumeric = 0;
485             }
486             else if (c == '}')
487             {
488                 /* "~}" == '\n'
489                  */
490                 bEOL = 1;
491                 bPreviousCodeWasNumeric = 0;
492             }
493             else if (bPreviousCodeWasNumeric)
494             {
495                 /* If the previous code was numeric, then the only valid code
496                  * sequences are the ones above: "~ " and "~}".  If we end up
497                  * here, it is because the number was followed by a '~' but
498                  * this '~' was not a code, it only marked the end of a
499                  * number that was not followed by any space.
500                  *
501                  * We should simply ignore the '~' and return the character
502                  * that follows it directly.
503                  */
504                 psInfo->szOutBuf[iOutBufPtr++] = c;
505                 bPreviousCodeWasNumeric = 0;
506             }
507             else if (c == '~' || c == '-')
508             {
509                 /* "~~" and "~-" are simple escape sequences for '~' and '-'
510                  */
511                 psInfo->szOutBuf[iOutBufPtr++] = c;
512             }
513             /* --------------------------------------------------------
514              * Compression level 2: numeric values are encoded.
515              *
516              * All codes for this level are in the form "~ c0 c1 c2 ... cn"
517              * where:
518              *
519              *  ~             marks the beginning of a new code sequence
520              *
521              *  c0            is a single character code defining the format
522              *                of the number (decimal position, exponent,
523              *                and even or odd number of digits)
524              *
525              *  c1 c2 ... cn  each of these characters represent a pair of
526              *                digits of the encoded value with '!' == 00
527              *                values 92..99 are encoded on 2 chars that
528              *                must be added to each other
529              *                (i.e. 92 == }!, 93 == }", ...)
530              *
531              *  The sequence ends with a ' ' or a '~' character
532              * -------------------------------------------------------*/
533             else if (c >= '!' && c <= 'z')
534             {
535                 /* The format code defines 3 characteristics of the final number:
536                  * - Presence of a decimal point and its position
537                  * - Presence of an exponent, and its sign
538                  * - Odd or even number of digits
539                  */
540                 n = c - '!';
541                 iDecimalPoint = n % 15; /* 0 = no decimal point         */
542                 bOddNumDigits = n / 45; /* 0 = even num.digits, 1 = odd */
543                 n = n / 15;
544                 if ( n % 3 == 1 )
545                     pszExp = "E+";
546                 else if (n % 3 == 2 )
547                     pszExp = "E-";
548                 else
549                     pszExp = NULL;
550 
551                 /* Decode the c1 c2 ... cn value and apply the format.
552                  * Read characters until we encounter a ' ' or a '~'
553                  */
554                 iCurDigit = 0;
555                 while((c=_GetNextSourceChar(psInfo)) != '\0' &&
556                       c != ' ' && c != '~')
557                 {
558                     n = c - '!';
559                     if (n == 92 && (c=_GetNextSourceChar(psInfo)) != '\0')
560                         n += c - '!';
561 
562                     psInfo->szOutBuf[iOutBufPtr++] = '0' + n/10;
563 
564                     if (++iCurDigit == iDecimalPoint)
565                         psInfo->szOutBuf[iOutBufPtr++] = '.';
566 
567                     psInfo->szOutBuf[iOutBufPtr++] = '0' + n%10;
568 
569                     if (++iCurDigit == iDecimalPoint)
570                         psInfo->szOutBuf[iOutBufPtr++] = '.';
571                 }
572 
573                 if (c == '~' || c == ' ')
574                 {
575                     bPreviousCodeWasNumeric = 1;
576                     _UngetSourceChar(psInfo);
577                 }
578 
579                 /* If odd number of digits, then flush the last one
580                  */
581                 if (bOddNumDigits)
582                     iOutBufPtr--;
583 
584                 /* Insert the exponent string before the 2 last digits
585                  * (we assume the exponent string is 2 chars. long)
586                  */
587                 if (pszExp)
588                 {
589                     for(i=0; i<2;i++)
590                     {
591                         psInfo->szOutBuf[iOutBufPtr] =
592                                    psInfo->szOutBuf[iOutBufPtr-2];
593                         psInfo->szOutBuf[iOutBufPtr-2] = pszExp[i];
594                         iOutBufPtr++;
595                     }
596                 }
597             }
598             else
599             {
600                 /* Unsupported code sequence... this is a possibility
601                  * given the fact that this library was written by
602                  * reverse-engineering the format!
603                  *
604                  * Send an error to the user and abort.
605                  *
606                  * If this error ever happens, and you are convinced that
607                  * the input file is not corrupted, then please report it to
608                  * me at dmorissette@mapgears.com, quoting the section of the input
609                  * file that produced it, and I'll do my best to add support
610                  * for this code sequence.
611                  */
612                 CPLError(CE_Failure, CPLE_NotSupported,
613                          "Unexpected code \"~%c\" encountered in line %d.",
614                           c, psInfo->nInputLineNo);
615 
616                 /* Force the program to abort by simulating a EOF
617                  */
618                 psInfo->bEOF = 1;
619                 bEOL = 1;
620             }
621 
622         }/* if c == '~' */
623 
624         /* E00 lines should NEVER be longer than 80 chars.  if we passed
625          * that limit, then the input file is likely corrupt.
626          */
627          if (iOutBufPtr > 80)
628          {
629             CPLError(CE_Failure, CPLE_FileIO,
630                       "Uncompressed line longer than 80 chars. "
631                       "Input file possibly corrupt around line %d.",
632                       psInfo->nInputLineNo);
633             /* Force the program to abort by simulating a EOF
634              */
635             psInfo->bEOF = 1;
636             bEOL = 1;
637          }
638 
639     }/* while !EOL */
640 
641     psInfo->szOutBuf[iOutBufPtr++] = '\0';
642 
643     return psInfo->szOutBuf;
644 }
645 
646 
647