1 /******************************************************************************
2  *
3  * Project:  Common Portability Library
4  * Purpose:  Implementation of CPLKeywordParser - a class for parsing
5  *           the keyword format used for files like QuickBird .RPB files.
6  *           This is a slight variation on the NASAKeywordParser used for
7  *           the PDS/ISIS2/ISIS3 formats.
8  * Author:   Frank Warmerdam <warmerdam@pobox.com
9  *
10  ******************************************************************************
11  * Copyright (c) 2008, Frank Warmerdam <warmerdam@pobox.com>
12  * Copyright (c) 2009-2010, Even Rouault <even dot rouault at spatialys.com>
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a
15  * copy of this software and associated documentation files (the "Software"),
16  * to deal in the Software without restriction, including without limitation
17  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
18  * and/or sell copies of the Software, and to permit persons to whom the
19  * Software is furnished to do so, subject to the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included
22  * in all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30  * DEALINGS IN THE SOFTWARE.
31  ****************************************************************************/
32 
33 //! @cond Doxygen_Suppress
34 
35 #include "cpl_port.h"
36 #include "cplkeywordparser.h"
37 
38 #include <cctype>
39 #include <cstring>
40 #include <string>
41 
42 #include "cpl_string.h"
43 #include "cpl_vsi.h"
44 
45 CPL_CVSID("$Id: cplkeywordparser.cpp 9bd3fecec5df135b1f12bbf195e41f35d5c0fe5a 2021-07-01 16:32:30 +0200 Even Rouault $")
46 
47 /************************************************************************/
48 /* ==================================================================== */
49 /*                          CPLKeywordParser                           */
50 /* ==================================================================== */
51 /************************************************************************/
52 
53 /************************************************************************/
54 /*                         CPLKeywordParser()                          */
55 /************************************************************************/
56 
57 CPLKeywordParser::CPLKeywordParser() = default;
58 
59 /************************************************************************/
60 /*                        ~CPLKeywordParser()                          */
61 /************************************************************************/
62 
~CPLKeywordParser()63 CPLKeywordParser::~CPLKeywordParser()
64 
65 {
66     CSLDestroy( papszKeywordList );
67     papszKeywordList = nullptr;
68 }
69 
70 /************************************************************************/
71 /*                               Ingest()                               */
72 /************************************************************************/
73 
Ingest(VSILFILE * fp)74 int CPLKeywordParser::Ingest( VSILFILE *fp )
75 
76 {
77 /* -------------------------------------------------------------------- */
78 /*      Read in buffer till we find END all on its own line.            */
79 /* -------------------------------------------------------------------- */
80     for( ; true; )
81     {
82         char szChunk[513] = {};
83         const size_t nBytesRead = VSIFReadL( szChunk, 1, 512, fp );
84 
85         szChunk[nBytesRead] = '\0';
86         osHeaderText += szChunk;
87 
88         if( nBytesRead < 512 )
89             break;
90 
91         const char *pszCheck = nullptr;
92         if( osHeaderText.size() > 520 )
93             pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520);
94         else
95             pszCheck = szChunk;
96 
97         if( strstr(pszCheck, "\r\nEND;\r\n") != nullptr
98             || strstr(pszCheck, "\nEND;\n") != nullptr )
99             break;
100     }
101 
102     pszHeaderNext = osHeaderText.c_str();
103 
104 /* -------------------------------------------------------------------- */
105 /*      Process name/value pairs, keeping track of a "path stack".      */
106 /* -------------------------------------------------------------------- */
107     return ReadGroup( "", 0 );
108 }
109 
110 /************************************************************************/
111 /*                             ReadGroup()                              */
112 /************************************************************************/
113 
ReadGroup(const char * pszPathPrefix,int nRecLevel)114 bool CPLKeywordParser::ReadGroup( const char *pszPathPrefix, int nRecLevel )
115 
116 {
117     CPLString osName;
118     CPLString osValue;
119 
120     // Arbitrary threshold to avoid stack overflow
121     if( nRecLevel == 100 )
122         return false;
123 
124     for( ; true; )
125     {
126         if( !ReadPair( osName, osValue ) )
127             return false;
128 
129         if( EQUAL(osName, "BEGIN_GROUP") || EQUAL(osName, "GROUP") )
130         {
131             if( !ReadGroup((CPLString(pszPathPrefix) + osValue + ".").c_str(),
132                            nRecLevel + 1) )
133                 return false;
134         }
135         else if( STARTS_WITH_CI(osName, "END") )
136         {
137             return true;
138         }
139         else
140         {
141             osName = pszPathPrefix + osName;
142             papszKeywordList = CSLSetNameValue( papszKeywordList,
143                                                 osName, osValue );
144         }
145     }
146 }
147 
148 /************************************************************************/
149 /*                              ReadPair()                              */
150 /*                                                                      */
151 /*      Read a name/value pair from the input stream.  Strip off        */
152 /*      white space, ignore comments, split on '='.                     */
153 /************************************************************************/
154 
ReadPair(CPLString & osName,CPLString & osValue)155 bool CPLKeywordParser::ReadPair( CPLString &osName, CPLString &osValue )
156 
157 {
158     osName = "";
159     osValue = "";
160 
161     if( !ReadWord( osName ) )
162         return false;
163 
164     SkipWhite();
165 
166     if( EQUAL(osName, "END") )
167         return TRUE;
168 
169     if( *pszHeaderNext != '=' )
170     {
171         // ISIS3 does not have anything after the end group/object keyword.
172         return EQUAL(osName, "End_Group") || EQUAL(osName, "End_Object");
173     }
174 
175     pszHeaderNext++;
176 
177     SkipWhite();
178 
179     osValue = "";
180 
181     // Handle value lists like:     Name   = (Red, Red)
182     // or list of lists like: TLCList = ( (0, 0.000000), (8299, 4.811014) );
183     if( *pszHeaderNext == '(' )
184     {
185         CPLString osWord;
186         int nDepth = 0;
187         const char* pszLastPos = pszHeaderNext;
188 
189         while( ReadWord( osWord ) && pszLastPos != pszHeaderNext)
190         {
191             SkipWhite();
192             pszLastPos = pszHeaderNext;
193 
194             osValue += osWord;
195             const char* pszIter = osWord.c_str();
196             bool bInQuote = false;
197             while( *pszIter != '\0' )
198             {
199                 if( *pszIter == '"' )
200                     bInQuote = !bInQuote;
201                 else if( !bInQuote )
202                 {
203                     if( *pszIter == '(' )
204                         nDepth++;
205                     else if( *pszIter == ')' )
206                     {
207                         nDepth--;
208                         if( nDepth == 0 )
209                             break;
210                     }
211                 }
212                 pszIter++;
213             }
214             if( *pszIter == ')' && nDepth == 0 )
215                 break;
216         }
217     }
218 
219     else // Handle more normal "single word" values.
220     {
221         // Special case to handle non-conformant IMD files generated by
222         // previous GDAL version where we omit to surround values that have
223         // spaces with double quotes.
224         // So we use a heuristics to handle things like:
225         //       key = value with spaces without single or double quotes at beginning of value;[\r]\n
226         const char* pszNextLF = strchr(pszHeaderNext, '\n');
227         if( pszNextLF )
228         {
229             std::string osTxt(pszHeaderNext, pszNextLF - pszHeaderNext);
230             const auto nCRPos = osTxt.find('\r');
231             const auto nSemiColonPos = osTxt.find(';');
232             const auto nQuotePos = osTxt.find('\'');
233             const auto nDoubleQuotePos = osTxt.find('"');
234             const auto nLTPos = osTxt.find('<');
235             if( nSemiColonPos != std::string::npos &&
236                 (nCRPos == std::string::npos || (nCRPos + 1 == osTxt.size())) &&
237                 ((nCRPos != std::string::npos && (nSemiColonPos + 1 == nCRPos)) ||
238                  (nCRPos == std::string::npos && (nSemiColonPos + 1 == osTxt.size()))) &&
239                 (nQuotePos == std::string::npos || nQuotePos != 0) &&
240                 (nDoubleQuotePos == std::string::npos || nDoubleQuotePos != 0) &&
241                 (nLTPos == std::string::npos || osTxt.find('>') == std::string::npos) )
242             {
243                 pszHeaderNext = pszNextLF;
244                 osTxt.resize(nSemiColonPos);
245                 osValue = osTxt;
246                 while( !osValue.empty() && osValue.back() == ' ' )
247                     osValue.resize(osValue.size() - 1);
248                 return true;
249             }
250         }
251 
252         if( !ReadWord( osValue ) )
253             return false;
254     }
255 
256     SkipWhite();
257 
258     // No units keyword?
259     if( *pszHeaderNext != '<' )
260         return true;
261 
262     // Append units keyword.  For lines that like like this:
263     //  MAP_RESOLUTION               = 4.0 <PIXEL/DEGREE>
264 
265     CPLString osWord;
266 
267     osValue += " ";
268 
269     while( ReadWord( osWord ) )
270     {
271         SkipWhite();
272 
273         osValue += osWord;
274         if( osWord.back() == '>' )
275             break;
276     }
277 
278     return true;
279 }
280 
281 /************************************************************************/
282 /*                              ReadWord()                              */
283 /************************************************************************/
284 
ReadWord(CPLString & osWord)285 bool CPLKeywordParser::ReadWord( CPLString &osWord )
286 
287 {
288     osWord = "";
289 
290     SkipWhite();
291 
292     if( *pszHeaderNext == '\0' || *pszHeaderNext == '=' )
293         return false;
294 
295     while( *pszHeaderNext != '\0'
296            && *pszHeaderNext != '='
297            && *pszHeaderNext != ';'
298            && !isspace(static_cast<unsigned char>(*pszHeaderNext)) )
299     {
300         if( *pszHeaderNext == '"' )
301         {
302             osWord += *(pszHeaderNext++);
303             while( *pszHeaderNext != '"' )
304             {
305                 if( *pszHeaderNext == '\0' )
306                     return false;
307 
308                 osWord += *(pszHeaderNext++);
309             }
310             osWord += *(pszHeaderNext++);
311         }
312         else if( *pszHeaderNext == '\'' )
313         {
314             osWord += *(pszHeaderNext++);
315             while( *pszHeaderNext != '\'' )
316             {
317                 if( *pszHeaderNext == '\0' )
318                     return false;
319 
320                 osWord += *(pszHeaderNext++);
321             }
322             osWord += *(pszHeaderNext++);
323         }
324         else
325         {
326             osWord += *pszHeaderNext;
327             pszHeaderNext++;
328         }
329     }
330 
331     if( *pszHeaderNext == ';' )
332         pszHeaderNext++;
333 
334     return true;
335 }
336 
337 /************************************************************************/
338 /*                             SkipWhite()                              */
339 /************************************************************************/
340 
SkipWhite()341 void CPLKeywordParser::SkipWhite()
342 
343 {
344     for( ; true; )
345     {
346         // Skip white space (newline, space, tab, etc )
347         if( isspace( static_cast<unsigned char>(*pszHeaderNext) ) )
348         {
349             pszHeaderNext++;
350             continue;
351         }
352 
353         // Skip C style comments
354         if( *pszHeaderNext == '/' && pszHeaderNext[1] == '*' )
355         {
356             pszHeaderNext += 2;
357 
358             while( *pszHeaderNext != '\0'
359                    && (*pszHeaderNext != '*'
360                        || pszHeaderNext[1] != '/' ) )
361             {
362                 pszHeaderNext++;
363             }
364             if( *pszHeaderNext == '\0' )
365                 break;
366 
367             pszHeaderNext += 2;
368             continue;
369         }
370 
371         // Skip # style comments
372         if( *pszHeaderNext == '#' )
373         {
374             pszHeaderNext += 1;
375 
376             // consume till end of line.
377             while( *pszHeaderNext != '\0'
378                    && *pszHeaderNext != 10
379                    && *pszHeaderNext != 13 )
380             {
381                 pszHeaderNext++;
382             }
383             continue;
384         }
385 
386         // not white space, return.
387         return;
388     }
389 }
390 
391 /************************************************************************/
392 /*                             GetKeyword()                             */
393 /************************************************************************/
394 
GetKeyword(const char * pszPath,const char * pszDefault)395 const char *CPLKeywordParser::GetKeyword( const char *pszPath,
396                                             const char *pszDefault )
397 
398 {
399     const char *pszResult = CSLFetchNameValue( papszKeywordList, pszPath );
400     if( pszResult == nullptr )
401         return pszDefault;
402 
403     return pszResult;
404 }
405 
406 //! @endcond
407