1 /******************************************************************************
2  *
3  * Project:  PDS Driver; Planetary Data System Format
4  * Purpose:  Implementation of NASAKeywordHandler - a class to read
5  *           keyword data from PDS, ISIS2 and ISIS3 data products.
6  * Author:   Frank Warmerdam <warmerdam@pobox.com
7  *
8  ******************************************************************************
9  * Copyright (c) 2006, Frank Warmerdam <warmerdam@pobox.com>
10  * Copyright (c) 2008-2010, Even Rouault <even dot rouault at spatialys.com>
11  * Copyright (c) 2017 Hobu Inc
12  * Copyright (c) 2017, Dmitry Baryshnikov <polimax@mail.ru>
13  * Copyright (c) 2017, NextGIS <info@nextgis.com>
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a
16  * copy of this software and associated documentation files (the "Software"),
17  * to deal in the Software without restriction, including without limitation
18  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19  * and/or sell copies of the Software, and to permit persons to whom the
20  * Software is furnished to do so, subject to the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included
23  * in all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
26  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
28  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31  * DEALINGS IN THE SOFTWARE.
32  ****************************************************************************
33  * Object Description Language (ODL) is used to encode data labels for PDS
34  * and other NASA data systems. Refer to Chapter 12 of "PDS Standards
35  * Reference" at http://pds.jpl.nasa.gov/tools/standards-reference.shtml for
36  * further details about ODL.
37  *
38  * This is also known as PVL (Parameter Value Language) which is written
39  * about at http://www.orrery.us/node/44 where it notes:
40  *
41  * The PVL syntax that the PDS uses is specified by the Consultative Committee
42  * for Space Data Systems in their Blue Book publication: "Parameter Value
43  * Language Specification (CCSD0006 and CCSD0008)", June 2000
44  * [CCSDS 641.0-B-2], and Green Book publication: "Parameter Value Language -
45  * A Tutorial", June 2000 [CCSDS 641.0-G-2]. PVL has also been accepted by the
46  * International Standards Organization (ISO), as a Final Draft International
47  * Standard (ISO 14961:2002) keyword value type language for naming and
48  * expressing data values.
49  * --
50  * also of interest, on PDS ODL:
51  *  http://pds.jpl.nasa.gov/documents/sr/Chapter12.pdf
52  *
53  ****************************************************************************/
54 
55 #include "nasakeywordhandler.h"
56 #include "ogrgeojsonreader.h"
57 #include <vector>
58 
59 CPL_CVSID("$Id: nasakeywordhandler.cpp 3708420fcd84068707ad0c2faef24d2398d02f32 2020-07-02 20:43:13 +0200 Even Rouault $")
60 
61 //! @cond Doxygen_Suppress
62 
63 /************************************************************************/
64 /* ==================================================================== */
65 /*                          NASAKeywordHandler                          */
66 /* ==================================================================== */
67 /************************************************************************/
68 
69 /************************************************************************/
70 /*                         NASAKeywordHandler()                         */
71 /************************************************************************/
72 
73 NASAKeywordHandler::NASAKeywordHandler() :
74     papszKeywordList(nullptr),
75     pszHeaderNext(nullptr),
76     m_bStripSurroundingQuotes(false)
77 {
78     oJSon.Deinit();
79 }
80 
81 /************************************************************************/
82 /*                        ~NASAKeywordHandler()                         */
83 /************************************************************************/
84 
85 NASAKeywordHandler::~NASAKeywordHandler()
86 
87 {
88     CSLDestroy( papszKeywordList );
89     papszKeywordList = nullptr;
90 }
91 
92 /************************************************************************/
93 /*                               Ingest()                               */
94 /************************************************************************/
95 
96 int NASAKeywordHandler::Ingest( VSILFILE *fp, int nOffset )
97 
98 {
99 /* -------------------------------------------------------------------- */
100 /*      Read in buffer till we find END all on its own line.            */
101 /* -------------------------------------------------------------------- */
102     if( VSIFSeekL( fp, nOffset, SEEK_SET ) != 0 )
103         return FALSE;
104 
105     for( ; true; )
106     {
107         char szChunk[513];
108 
109         int nBytesRead = static_cast<int>(VSIFReadL( szChunk, 1, 512, fp ));
110 
111         szChunk[nBytesRead] = '\0';
112         osHeaderText += szChunk;
113 
114         if( nBytesRead < 512 )
115             break;
116 
117         const char *pszCheck = nullptr;
118         if( osHeaderText.size() > 520 )
119             pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520);
120         else
121             pszCheck = szChunk;
122 
123         if( strstr(pszCheck,"\r\nEND\r\n") != nullptr
124             || strstr(pszCheck,"\nEND\n") != nullptr
125             || strstr(pszCheck,"\r\nEnd\r\n") != nullptr
126             || strstr(pszCheck,"\nEnd\n") != nullptr )
127             break;
128     }
129 
130     pszHeaderNext = osHeaderText.c_str();
131 
132 
133 /* -------------------------------------------------------------------- */
134 /*      Process name/value pairs, keeping track of a "path stack".      */
135 /* -------------------------------------------------------------------- */
136     oJSon = CPLJSONObject();
137     return ReadGroup( "", oJSon, 0 );
138 }
139 
140 /************************************************************************/
141 /*                             ReadGroup()                              */
142 /************************************************************************/
143 
144 int NASAKeywordHandler::ReadGroup( const char *pszPathPrefix, CPLJSONObject &oCur,
145                                    int nRecLevel )
146 
147 {
148     if( nRecLevel == 100 )
149         return FALSE;
150     for( ; true; )
151     {
152         CPLString osName, osValue;
153         if( !ReadPair( osName, osValue, oCur ) )
154             return FALSE;
155 
156         if( EQUAL(osName,"OBJECT") || EQUAL(osName,"GROUP") )
157         {
158             CPLJSONObject oNewGroup;
159             oNewGroup.Add( "_type", EQUAL(osName,"OBJECT") ? "object" : "group" );
160             if( !ReadGroup( (CPLString(pszPathPrefix) + osValue + ".").c_str(),
161                             oNewGroup, nRecLevel + 1 ) )
162             {
163                 return FALSE;
164             }
165             CPLJSONObject oName = oNewGroup["Name"];
166             if( (osValue == "Table" || osValue == "Field") &&
167                 (oName.GetType() == CPLJSONObject::Type::String) )
168             {
169                 oCur.Add( osValue + "_" + oName.ToString(), oNewGroup );
170                 oNewGroup.Add( "_container_name", osValue );
171             }
172             else if( oCur[osValue].IsValid() )
173             {
174                 int nIter = 2;
175                 while( oCur[osValue + CPLSPrintf("_%d", nIter)].IsValid() )
176                 {
177                     nIter ++;
178                 }
179                 oCur.Add( osValue + CPLSPrintf("_%d", nIter), oNewGroup );
180                 oNewGroup.Add( "_container_name", osValue );
181             }
182             else
183             {
184                 oCur.Add( osValue, oNewGroup );
185             }
186         }
187         else if( EQUAL(osName,"END")
188                  || EQUAL(osName,"END_GROUP" )
189                  || EQUAL(osName,"END_OBJECT" ) )
190         {
191             return TRUE;
192         }
193         else
194         {
195             osName = pszPathPrefix + osName;
196             papszKeywordList = CSLSetNameValue( papszKeywordList,
197                                                 osName, osValue );
198         }
199     }
200 }
201 
202 /************************************************************************/
203 /*                        StripQuotesIfNeeded()                         */
204 /************************************************************************/
205 
206 static CPLString StripQuotesIfNeeded(const CPLString& osWord,
207                                      bool bQuotesAlreadyRemoved)
208 {
209     if( bQuotesAlreadyRemoved || osWord.size() < 2 || osWord[0] != '"' )
210         return osWord;
211     return osWord.substr(1, osWord.size() - 2);
212 }
213 
214 /************************************************************************/
215 /*                              ReadPair()                              */
216 /*                                                                      */
217 /*      Read a name/value pair from the input stream.  Strip off        */
218 /*      white space, ignore comments, split on '='.                     */
219 /*      Returns TRUE on success.                                        */
220 /************************************************************************/
221 
222 int NASAKeywordHandler::ReadPair( CPLString &osName, CPLString &osValue,
223                                   CPLJSONObject &oCur )
224 
225 {
226     osName = "";
227     osValue = "";
228 
229     if( !ReadWord( osName ) )
230         return FALSE;
231 
232     SkipWhite();
233 
234     if( EQUAL(osName,"END") )
235         return TRUE;
236 
237     if( *pszHeaderNext != '=' )
238     {
239         // ISIS3 does not have anything after the end group/object keyword.
240         if( EQUAL(osName,"End_Group") || EQUAL(osName,"End_Object") )
241             return TRUE;
242 
243         return FALSE;
244     }
245 
246     pszHeaderNext++;
247 
248     SkipWhite();
249 
250     osValue = "";
251     bool bIsString = true;
252 
253     // Handle value lists like:
254     // Name   = (Red, Red) or  {Red, Red} or even ({Red, Red}, {Red, Red})
255     CPLJSONArray oArray;
256     if( *pszHeaderNext == '(' || *pszHeaderNext == '{' )
257     {
258         std::vector<char> oStackArrayBeginChar;
259         CPLString osWord;
260 
261         oStackArrayBeginChar.push_back(*pszHeaderNext);
262         osValue += *pszHeaderNext;
263         pszHeaderNext++;
264 
265         while( ReadWord( osWord, m_bStripSurroundingQuotes,
266                          true, &bIsString ) )
267         {
268             if( *pszHeaderNext == '(' ||  *pszHeaderNext == '{' )
269             {
270                 oStackArrayBeginChar.push_back(*pszHeaderNext);
271                 osValue += *pszHeaderNext;
272                 pszHeaderNext ++;
273             }
274 
275             // TODO: we could probably do better with nested json arrays
276             // instead of flattening when there are (( )) or ({ }) constructs
277             if( bIsString )
278             {
279                 if( !(osWord.empty() && (*pszHeaderNext == '(' ||
280                       *pszHeaderNext == '{' || *pszHeaderNext == ')' ||
281                       *pszHeaderNext == '}')) )
282                 {
283                     oArray.Add(StripQuotesIfNeeded(osWord, m_bStripSurroundingQuotes));
284                 }
285             }
286             else  if( CPLGetValueType(osWord) == CPL_VALUE_INTEGER )
287             {
288                 oArray.Add( atoi(osWord) );
289             }
290             else
291             {
292                 oArray.Add( CPLAtof(osWord) );
293             }
294 
295             osValue += osWord;
296             while ( isspace( static_cast<unsigned char>( *pszHeaderNext ) ) )
297             {
298                 pszHeaderNext++;
299             }
300 
301             if( *pszHeaderNext == ')' )
302             {
303                 osValue += *pszHeaderNext;
304                 if( oStackArrayBeginChar.empty() ||
305                     oStackArrayBeginChar.back() != '(' )
306                 {
307                     CPLDebug("PDS", "Unpaired ( ) for %s", osName.c_str());
308                     return FALSE;
309                 }
310                 oStackArrayBeginChar.pop_back();
311                 pszHeaderNext ++;
312                 if( oStackArrayBeginChar.empty() )
313                     break;
314             }
315             else if( *pszHeaderNext == '}' )
316             {
317                 osValue += *pszHeaderNext;
318                 if( oStackArrayBeginChar.empty() ||
319                     oStackArrayBeginChar.back() != '{' )
320                 {
321                     CPLDebug("PDS", "Unpaired { } for %s", osName.c_str());
322                     return FALSE;
323                 }
324                 oStackArrayBeginChar.pop_back();
325                 pszHeaderNext ++;
326                 if( oStackArrayBeginChar.empty() )
327                     break;
328             }
329             else if( *pszHeaderNext == ',' )
330             {
331                 osValue += *pszHeaderNext;
332                 pszHeaderNext ++;
333                 // Do not use SkipWhite() here to avoid being confuse by
334                 // constructs like
335                 // FOO = (#123456,
336                 //        #123456)
337                 // where we could confuse the second line with a comment.
338                 while ( isspace( static_cast<unsigned char>( *pszHeaderNext ) ) )
339                 {
340                     pszHeaderNext++;
341                 }
342             }
343             SkipWhite();
344 
345         }
346     }
347 
348     else // Handle more normal "single word" values.
349     {
350         if( !ReadWord( osValue, m_bStripSurroundingQuotes, false, &bIsString ) )
351             return FALSE;
352     }
353 
354     SkipWhite();
355 
356     // No units keyword?
357     if( *pszHeaderNext != '<' )
358     {
359         if( !EQUAL(osName, "OBJECT") && !EQUAL(osName, "GROUP") )
360         {
361             if( oArray.Size() > 0 )
362             {
363                 oCur.Add(osName, oArray);
364             }
365             else
366             {
367                 if( bIsString )
368                 {
369                     oCur.Add( osName, StripQuotesIfNeeded(osValue, m_bStripSurroundingQuotes) );
370                 }
371                 else if( CPLGetValueType(osValue) == CPL_VALUE_INTEGER )
372                 {
373                     oCur.Add( osName, atoi(osValue) );
374                 }
375                 else
376                 {
377                     oCur.Add( osName, CPLAtof(osValue) );
378                 }
379             }
380         }
381         return TRUE;
382     }
383 
384     CPLString osValueNoUnit(osValue);
385     // Append units keyword.  For lines that like like this:
386     //  MAP_RESOLUTION               = 4.0 <PIXEL/DEGREE>
387 
388     osValue += " ";
389 
390     CPLString osWord;
391     CPLString osUnit;
392     while( ReadWord( osWord ) )
393     {
394         SkipWhite();
395 
396         osValue += osWord;
397         osUnit = osWord;
398         if( osWord.back() == '>' )
399             break;
400     }
401 
402     if( osUnit[0] == '<' )
403         osUnit = osUnit.substr(1);
404     if( !osUnit.empty() && osUnit.back() == '>' )
405         osUnit = osUnit.substr(0, osUnit.size() - 1);
406 
407     CPLJSONObject newObject;
408     oCur.Add( osName, newObject );
409 
410     if( oArray.Size() > 0 )
411     {
412         newObject.Add( "value", oArray );
413     }
414     else
415     {
416         if( bIsString )
417         {
418             newObject.Add( "value", osValueNoUnit );
419         }
420         else if( CPLGetValueType(osValueNoUnit) == CPL_VALUE_INTEGER )
421         {
422             newObject.Add( "value", atoi(osValueNoUnit) );
423         }
424         else
425         {
426             newObject.Add( "value", CPLAtof(osValueNoUnit) );
427         }
428     }
429     newObject.Add( "unit", osUnit );
430 
431     return TRUE;
432 }
433 
434 /************************************************************************/
435 /*                              ReadWord()                              */
436 /*  Returns TRUE on success                                             */
437 /************************************************************************/
438 
439 int NASAKeywordHandler::ReadWord( CPLString &osWord,
440                                   bool bStripSurroundingQuotes,
441                                   bool bParseList,
442                                   bool* pbIsString )
443 
444 {
445     if( pbIsString )
446         *pbIsString = false;
447     osWord = "";
448 
449     SkipWhite();
450 
451     if( !(*pszHeaderNext != '\0'
452           && *pszHeaderNext != '='
453           && !isspace( static_cast<unsigned char>( *pszHeaderNext ) ) ) )
454         return FALSE;
455 
456     /* Extract a text string delimited by '\"' */
457     /* Convert newlines (CR or LF) within quotes. While text strings
458        support them as per ODL, the keyword list doesn't want them */
459     if( *pszHeaderNext == '"' )
460     {
461         if( pbIsString )
462             *pbIsString = true;
463         if( !bStripSurroundingQuotes )
464             osWord += *(pszHeaderNext);
465         pszHeaderNext ++;
466         while( *pszHeaderNext != '"' )
467         {
468             if( *pszHeaderNext == '\0' )
469                 return FALSE;
470             if( *pszHeaderNext == '\n' )
471             {
472                 osWord += "\\n";
473                 pszHeaderNext++;
474                 continue;
475             }
476             if( *pszHeaderNext == '\r' )
477             {
478                 osWord += "\\r";
479                 pszHeaderNext++;
480                 continue;
481             }
482             osWord += *(pszHeaderNext++);
483         }
484         if( !bStripSurroundingQuotes )
485             osWord += *(pszHeaderNext);
486         pszHeaderNext ++;
487 
488         return TRUE;
489     }
490 
491     /* Extract a symbol string */
492     /* These are expected to not have
493        '\'' (delimiters),
494        format effectors (should fit on a single line) or
495        control characters.
496     */
497     if( *pszHeaderNext == '\'' )
498     {
499         if( pbIsString )
500             *pbIsString = true;
501         if( !bStripSurroundingQuotes )
502             osWord += *(pszHeaderNext);
503         pszHeaderNext ++;
504         while( *pszHeaderNext != '\'' )
505         {
506             if( *pszHeaderNext == '\0' )
507                 return FALSE;
508 
509             osWord += *(pszHeaderNext++);
510         }
511         if( !bStripSurroundingQuotes )
512             osWord += *(pszHeaderNext);
513         pszHeaderNext ++;
514         return TRUE;
515     }
516 
517     /*
518      * Extract normal text.  Terminated by '=' or whitespace.
519      *
520      * A special exception is that a line may terminate with a '-'
521      * which is taken as a line extender, and we suck up white space to new
522      * text.
523      */
524     while( *pszHeaderNext != '\0'
525            && *pszHeaderNext != '='
526            && ((bParseList && *pszHeaderNext != ',' && *pszHeaderNext != '(' &&
527                 *pszHeaderNext != ')'&& *pszHeaderNext != '{' &&
528                 *pszHeaderNext != '}' ) ||
529                (!bParseList && !isspace(static_cast<unsigned char>( *pszHeaderNext ) ))) )
530     {
531         osWord += *pszHeaderNext;
532         pszHeaderNext++;
533 
534         if( *pszHeaderNext == '-'
535             && (pszHeaderNext[1] == 10 || pszHeaderNext[1] == 13) )
536         {
537             pszHeaderNext += 2;
538             SkipWhite();
539         }
540     }
541 
542     if( pbIsString )
543         *pbIsString = CPLGetValueType(osWord) == CPL_VALUE_STRING;
544 
545     return TRUE;
546 }
547 
548 /************************************************************************/
549 /*                             SkipWhite()                              */
550 /*  Skip white spaces and C style comments                              */
551 /************************************************************************/
552 
553 void NASAKeywordHandler::SkipWhite()
554 
555 {
556     for( ; true; )
557     {
558         // Skip C style comments
559         if( *pszHeaderNext == '/' && pszHeaderNext[1] == '*' )
560         {
561             pszHeaderNext += 2;
562 
563             while( *pszHeaderNext != '\0'
564                    && (*pszHeaderNext != '*'
565                        || pszHeaderNext[1] != '/' ) )
566             {
567                 pszHeaderNext++;
568             }
569             if( *pszHeaderNext == '\0' )
570                 return;
571 
572             pszHeaderNext += 2;
573 
574             // consume till end of line.
575             // reduce sensibility to a label error
576             while( *pszHeaderNext != '\0'
577                    && *pszHeaderNext != 10
578                    && *pszHeaderNext != 13 )
579             {
580                 pszHeaderNext++;
581             }
582             continue;
583         }
584 
585         // Skip # style comments
586         if( (*pszHeaderNext == 10 || *pszHeaderNext == 13 ||
587              *pszHeaderNext == ' ' || *pszHeaderNext == '\t' )
588               && pszHeaderNext[1] == '#' )
589         {
590             pszHeaderNext += 2;
591 
592             // consume till end of line.
593             while( *pszHeaderNext != '\0'
594                    && *pszHeaderNext != 10
595                    && *pszHeaderNext != 13 )
596             {
597                 pszHeaderNext++;
598             }
599             continue;
600         }
601 
602         // Skip white space (newline, space, tab, etc )
603         if( isspace( static_cast<unsigned char>( *pszHeaderNext ) ) )
604         {
605             pszHeaderNext++;
606             continue;
607         }
608 
609         // not white space, return.
610         return;
611     }
612 }
613 
614 /************************************************************************/
615 /*                             GetKeyword()                             */
616 /************************************************************************/
617 
618 const char *NASAKeywordHandler::GetKeyword( const char *pszPath,
619                                             const char *pszDefault )
620 
621 {
622     const char *pszResult = CSLFetchNameValue( papszKeywordList, pszPath );
623 
624     if( pszResult == nullptr )
625         return pszDefault;
626 
627     return pszResult;
628 }
629 
630 /************************************************************************/
631 /*                             GetKeywordList()                         */
632 /************************************************************************/
633 
634 char **NASAKeywordHandler::GetKeywordList()
635 {
636     return papszKeywordList;
637 }
638 
639 /************************************************************************/
640 /*                               StealJSon()                            */
641 /************************************************************************/
642 
643 CPLJSONObject NASAKeywordHandler::GetJsonObject() const
644 {
645     return oJSon;
646 }
647 
648 //! @endcond
649