1 /******************************************************************************
2 *
3 * Project: Common Portability Library
4 * Purpose: Implementation of CPLKeywordParser - a class for parsing
5 * the keyword format used for files like QuickBird .RPB files.
6 * This is a slight variation on the NASAKeywordParser used for
7 * the PDS/ISIS2/ISIS3 formats.
8 * Author: Frank Warmerdam <warmerdam@pobox.com
9 *
10 ******************************************************************************
11 * Copyright (c) 2008, Frank Warmerdam <warmerdam@pobox.com>
12 * Copyright (c) 2009-2010, Even Rouault <even dot rouault at spatialys.com>
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a
15 * copy of this software and associated documentation files (the "Software"),
16 * to deal in the Software without restriction, including without limitation
17 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
18 * and/or sell copies of the Software, and to permit persons to whom the
19 * Software is furnished to do so, subject to the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included
22 * in all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30 * DEALINGS IN THE SOFTWARE.
31 ****************************************************************************/
32
33 //! @cond Doxygen_Suppress
34
35 #include "cpl_port.h"
36 #include "cplkeywordparser.h"
37
38 #include <cctype>
39 #include <cstring>
40 #include <string>
41
42 #include "cpl_string.h"
43 #include "cpl_vsi.h"
44
45 CPL_CVSID("$Id: cplkeywordparser.cpp 9bd3fecec5df135b1f12bbf195e41f35d5c0fe5a 2021-07-01 16:32:30 +0200 Even Rouault $")
46
47 /************************************************************************/
48 /* ==================================================================== */
49 /* CPLKeywordParser */
50 /* ==================================================================== */
51 /************************************************************************/
52
53 /************************************************************************/
54 /* CPLKeywordParser() */
55 /************************************************************************/
56
57 CPLKeywordParser::CPLKeywordParser() = default;
58
59 /************************************************************************/
60 /* ~CPLKeywordParser() */
61 /************************************************************************/
62
~CPLKeywordParser()63 CPLKeywordParser::~CPLKeywordParser()
64
65 {
66 CSLDestroy( papszKeywordList );
67 papszKeywordList = nullptr;
68 }
69
70 /************************************************************************/
71 /* Ingest() */
72 /************************************************************************/
73
Ingest(VSILFILE * fp)74 int CPLKeywordParser::Ingest( VSILFILE *fp )
75
76 {
77 /* -------------------------------------------------------------------- */
78 /* Read in buffer till we find END all on its own line. */
79 /* -------------------------------------------------------------------- */
80 for( ; true; )
81 {
82 char szChunk[513] = {};
83 const size_t nBytesRead = VSIFReadL( szChunk, 1, 512, fp );
84
85 szChunk[nBytesRead] = '\0';
86 osHeaderText += szChunk;
87
88 if( nBytesRead < 512 )
89 break;
90
91 const char *pszCheck = nullptr;
92 if( osHeaderText.size() > 520 )
93 pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520);
94 else
95 pszCheck = szChunk;
96
97 if( strstr(pszCheck, "\r\nEND;\r\n") != nullptr
98 || strstr(pszCheck, "\nEND;\n") != nullptr )
99 break;
100 }
101
102 pszHeaderNext = osHeaderText.c_str();
103
104 /* -------------------------------------------------------------------- */
105 /* Process name/value pairs, keeping track of a "path stack". */
106 /* -------------------------------------------------------------------- */
107 return ReadGroup( "", 0 );
108 }
109
110 /************************************************************************/
111 /* ReadGroup() */
112 /************************************************************************/
113
ReadGroup(const char * pszPathPrefix,int nRecLevel)114 bool CPLKeywordParser::ReadGroup( const char *pszPathPrefix, int nRecLevel )
115
116 {
117 CPLString osName;
118 CPLString osValue;
119
120 // Arbitrary threshold to avoid stack overflow
121 if( nRecLevel == 100 )
122 return false;
123
124 for( ; true; )
125 {
126 if( !ReadPair( osName, osValue ) )
127 return false;
128
129 if( EQUAL(osName, "BEGIN_GROUP") || EQUAL(osName, "GROUP") )
130 {
131 if( !ReadGroup((CPLString(pszPathPrefix) + osValue + ".").c_str(),
132 nRecLevel + 1) )
133 return false;
134 }
135 else if( STARTS_WITH_CI(osName, "END") )
136 {
137 return true;
138 }
139 else
140 {
141 osName = pszPathPrefix + osName;
142 papszKeywordList = CSLSetNameValue( papszKeywordList,
143 osName, osValue );
144 }
145 }
146 }
147
148 /************************************************************************/
149 /* ReadPair() */
150 /* */
151 /* Read a name/value pair from the input stream. Strip off */
152 /* white space, ignore comments, split on '='. */
153 /************************************************************************/
154
ReadPair(CPLString & osName,CPLString & osValue)155 bool CPLKeywordParser::ReadPair( CPLString &osName, CPLString &osValue )
156
157 {
158 osName = "";
159 osValue = "";
160
161 if( !ReadWord( osName ) )
162 return false;
163
164 SkipWhite();
165
166 if( EQUAL(osName, "END") )
167 return TRUE;
168
169 if( *pszHeaderNext != '=' )
170 {
171 // ISIS3 does not have anything after the end group/object keyword.
172 return EQUAL(osName, "End_Group") || EQUAL(osName, "End_Object");
173 }
174
175 pszHeaderNext++;
176
177 SkipWhite();
178
179 osValue = "";
180
181 // Handle value lists like: Name = (Red, Red)
182 // or list of lists like: TLCList = ( (0, 0.000000), (8299, 4.811014) );
183 if( *pszHeaderNext == '(' )
184 {
185 CPLString osWord;
186 int nDepth = 0;
187 const char* pszLastPos = pszHeaderNext;
188
189 while( ReadWord( osWord ) && pszLastPos != pszHeaderNext)
190 {
191 SkipWhite();
192 pszLastPos = pszHeaderNext;
193
194 osValue += osWord;
195 const char* pszIter = osWord.c_str();
196 bool bInQuote = false;
197 while( *pszIter != '\0' )
198 {
199 if( *pszIter == '"' )
200 bInQuote = !bInQuote;
201 else if( !bInQuote )
202 {
203 if( *pszIter == '(' )
204 nDepth++;
205 else if( *pszIter == ')' )
206 {
207 nDepth--;
208 if( nDepth == 0 )
209 break;
210 }
211 }
212 pszIter++;
213 }
214 if( *pszIter == ')' && nDepth == 0 )
215 break;
216 }
217 }
218
219 else // Handle more normal "single word" values.
220 {
221 // Special case to handle non-conformant IMD files generated by
222 // previous GDAL version where we omit to surround values that have
223 // spaces with double quotes.
224 // So we use a heuristics to handle things like:
225 // key = value with spaces without single or double quotes at beginning of value;[\r]\n
226 const char* pszNextLF = strchr(pszHeaderNext, '\n');
227 if( pszNextLF )
228 {
229 std::string osTxt(pszHeaderNext, pszNextLF - pszHeaderNext);
230 const auto nCRPos = osTxt.find('\r');
231 const auto nSemiColonPos = osTxt.find(';');
232 const auto nQuotePos = osTxt.find('\'');
233 const auto nDoubleQuotePos = osTxt.find('"');
234 const auto nLTPos = osTxt.find('<');
235 if( nSemiColonPos != std::string::npos &&
236 (nCRPos == std::string::npos || (nCRPos + 1 == osTxt.size())) &&
237 ((nCRPos != std::string::npos && (nSemiColonPos + 1 == nCRPos)) ||
238 (nCRPos == std::string::npos && (nSemiColonPos + 1 == osTxt.size()))) &&
239 (nQuotePos == std::string::npos || nQuotePos != 0) &&
240 (nDoubleQuotePos == std::string::npos || nDoubleQuotePos != 0) &&
241 (nLTPos == std::string::npos || osTxt.find('>') == std::string::npos) )
242 {
243 pszHeaderNext = pszNextLF;
244 osTxt.resize(nSemiColonPos);
245 osValue = osTxt;
246 while( !osValue.empty() && osValue.back() == ' ' )
247 osValue.resize(osValue.size() - 1);
248 return true;
249 }
250 }
251
252 if( !ReadWord( osValue ) )
253 return false;
254 }
255
256 SkipWhite();
257
258 // No units keyword?
259 if( *pszHeaderNext != '<' )
260 return true;
261
262 // Append units keyword. For lines that like like this:
263 // MAP_RESOLUTION = 4.0 <PIXEL/DEGREE>
264
265 CPLString osWord;
266
267 osValue += " ";
268
269 while( ReadWord( osWord ) )
270 {
271 SkipWhite();
272
273 osValue += osWord;
274 if( osWord.back() == '>' )
275 break;
276 }
277
278 return true;
279 }
280
281 /************************************************************************/
282 /* ReadWord() */
283 /************************************************************************/
284
ReadWord(CPLString & osWord)285 bool CPLKeywordParser::ReadWord( CPLString &osWord )
286
287 {
288 osWord = "";
289
290 SkipWhite();
291
292 if( *pszHeaderNext == '\0' || *pszHeaderNext == '=' )
293 return false;
294
295 while( *pszHeaderNext != '\0'
296 && *pszHeaderNext != '='
297 && *pszHeaderNext != ';'
298 && !isspace(static_cast<unsigned char>(*pszHeaderNext)) )
299 {
300 if( *pszHeaderNext == '"' )
301 {
302 osWord += *(pszHeaderNext++);
303 while( *pszHeaderNext != '"' )
304 {
305 if( *pszHeaderNext == '\0' )
306 return false;
307
308 osWord += *(pszHeaderNext++);
309 }
310 osWord += *(pszHeaderNext++);
311 }
312 else if( *pszHeaderNext == '\'' )
313 {
314 osWord += *(pszHeaderNext++);
315 while( *pszHeaderNext != '\'' )
316 {
317 if( *pszHeaderNext == '\0' )
318 return false;
319
320 osWord += *(pszHeaderNext++);
321 }
322 osWord += *(pszHeaderNext++);
323 }
324 else
325 {
326 osWord += *pszHeaderNext;
327 pszHeaderNext++;
328 }
329 }
330
331 if( *pszHeaderNext == ';' )
332 pszHeaderNext++;
333
334 return true;
335 }
336
337 /************************************************************************/
338 /* SkipWhite() */
339 /************************************************************************/
340
SkipWhite()341 void CPLKeywordParser::SkipWhite()
342
343 {
344 for( ; true; )
345 {
346 // Skip white space (newline, space, tab, etc )
347 if( isspace( static_cast<unsigned char>(*pszHeaderNext) ) )
348 {
349 pszHeaderNext++;
350 continue;
351 }
352
353 // Skip C style comments
354 if( *pszHeaderNext == '/' && pszHeaderNext[1] == '*' )
355 {
356 pszHeaderNext += 2;
357
358 while( *pszHeaderNext != '\0'
359 && (*pszHeaderNext != '*'
360 || pszHeaderNext[1] != '/' ) )
361 {
362 pszHeaderNext++;
363 }
364 if( *pszHeaderNext == '\0' )
365 break;
366
367 pszHeaderNext += 2;
368 continue;
369 }
370
371 // Skip # style comments
372 if( *pszHeaderNext == '#' )
373 {
374 pszHeaderNext += 1;
375
376 // consume till end of line.
377 while( *pszHeaderNext != '\0'
378 && *pszHeaderNext != 10
379 && *pszHeaderNext != 13 )
380 {
381 pszHeaderNext++;
382 }
383 continue;
384 }
385
386 // not white space, return.
387 return;
388 }
389 }
390
391 /************************************************************************/
392 /* GetKeyword() */
393 /************************************************************************/
394
GetKeyword(const char * pszPath,const char * pszDefault)395 const char *CPLKeywordParser::GetKeyword( const char *pszPath,
396 const char *pszDefault )
397
398 {
399 const char *pszResult = CSLFetchNameValue( papszKeywordList, pszPath );
400 if( pszResult == nullptr )
401 return pszDefault;
402
403 return pszResult;
404 }
405
406 //! @endcond
407