1 /****************************************************************************** 2 * 3 * Project: PDS Driver; Planetary Data System Format 4 * Purpose: Implementation of NASAKeywordHandler - a class to read 5 * keyword data from PDS, ISIS2 and ISIS3 data products. 6 * Author: Frank Warmerdam <warmerdam@pobox.com 7 * 8 ****************************************************************************** 9 * Copyright (c) 2006, Frank Warmerdam <warmerdam@pobox.com> 10 * Copyright (c) 2008-2010, Even Rouault <even dot rouault at spatialys.com> 11 * Copyright (c) 2017 Hobu Inc 12 * Copyright (c) 2017, Dmitry Baryshnikov <polimax@mail.ru> 13 * Copyright (c) 2017, NextGIS <info@nextgis.com> 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining a 16 * copy of this software and associated documentation files (the "Software"), 17 * to deal in the Software without restriction, including without limitation 18 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 19 * and/or sell copies of the Software, and to permit persons to whom the 20 * Software is furnished to do so, subject to the following conditions: 21 * 22 * The above copyright notice and this permission notice shall be included 23 * in all copies or substantial portions of the Software. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 26 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 28 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 31 * DEALINGS IN THE SOFTWARE. 32 **************************************************************************** 33 * Object Description Language (ODL) is used to encode data labels for PDS 34 * and other NASA data systems. Refer to Chapter 12 of "PDS Standards 35 * Reference" at http://pds.jpl.nasa.gov/tools/standards-reference.shtml for 36 * further details about ODL. 37 * 38 * This is also known as PVL (Parameter Value Language) which is written 39 * about at http://www.orrery.us/node/44 where it notes: 40 * 41 * The PVL syntax that the PDS uses is specified by the Consultative Committee 42 * for Space Data Systems in their Blue Book publication: "Parameter Value 43 * Language Specification (CCSD0006 and CCSD0008)", June 2000 44 * [CCSDS 641.0-B-2], and Green Book publication: "Parameter Value Language - 45 * A Tutorial", June 2000 [CCSDS 641.0-G-2]. PVL has also been accepted by the 46 * International Standards Organization (ISO), as a Final Draft International 47 * Standard (ISO 14961:2002) keyword value type language for naming and 48 * expressing data values. 49 * -- 50 * also of interest, on PDS ODL: 51 * http://pds.jpl.nasa.gov/documents/sr/Chapter12.pdf 52 * 53 ****************************************************************************/ 54 55 #include "nasakeywordhandler.h" 56 #include "ogrgeojsonreader.h" 57 #include <vector> 58 59 CPL_CVSID("$Id: nasakeywordhandler.cpp 3708420fcd84068707ad0c2faef24d2398d02f32 2020-07-02 20:43:13 +0200 Even Rouault $") 60 61 //! @cond Doxygen_Suppress 62 63 /************************************************************************/ 64 /* ==================================================================== */ 65 /* NASAKeywordHandler */ 66 /* ==================================================================== */ 67 /************************************************************************/ 68 69 /************************************************************************/ 70 /* NASAKeywordHandler() */ 71 /************************************************************************/ 72 73 NASAKeywordHandler::NASAKeywordHandler() : 74 papszKeywordList(nullptr), 75 pszHeaderNext(nullptr), 76 m_bStripSurroundingQuotes(false) 77 { 78 oJSon.Deinit(); 79 } 80 81 /************************************************************************/ 82 /* ~NASAKeywordHandler() */ 83 /************************************************************************/ 84 85 NASAKeywordHandler::~NASAKeywordHandler() 86 87 { 88 CSLDestroy( papszKeywordList ); 89 papszKeywordList = nullptr; 90 } 91 92 /************************************************************************/ 93 /* Ingest() */ 94 /************************************************************************/ 95 96 int NASAKeywordHandler::Ingest( VSILFILE *fp, int nOffset ) 97 98 { 99 /* -------------------------------------------------------------------- */ 100 /* Read in buffer till we find END all on its own line. */ 101 /* -------------------------------------------------------------------- */ 102 if( VSIFSeekL( fp, nOffset, SEEK_SET ) != 0 ) 103 return FALSE; 104 105 for( ; true; ) 106 { 107 char szChunk[513]; 108 109 int nBytesRead = static_cast<int>(VSIFReadL( szChunk, 1, 512, fp )); 110 111 szChunk[nBytesRead] = '\0'; 112 osHeaderText += szChunk; 113 114 if( nBytesRead < 512 ) 115 break; 116 117 const char *pszCheck = nullptr; 118 if( osHeaderText.size() > 520 ) 119 pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520); 120 else 121 pszCheck = szChunk; 122 123 if( strstr(pszCheck,"\r\nEND\r\n") != nullptr 124 || strstr(pszCheck,"\nEND\n") != nullptr 125 || strstr(pszCheck,"\r\nEnd\r\n") != nullptr 126 || strstr(pszCheck,"\nEnd\n") != nullptr ) 127 break; 128 } 129 130 pszHeaderNext = osHeaderText.c_str(); 131 132 133 /* -------------------------------------------------------------------- */ 134 /* Process name/value pairs, keeping track of a "path stack". */ 135 /* -------------------------------------------------------------------- */ 136 oJSon = CPLJSONObject(); 137 return ReadGroup( "", oJSon, 0 ); 138 } 139 140 /************************************************************************/ 141 /* ReadGroup() */ 142 /************************************************************************/ 143 144 int NASAKeywordHandler::ReadGroup( const char *pszPathPrefix, CPLJSONObject &oCur, 145 int nRecLevel ) 146 147 { 148 if( nRecLevel == 100 ) 149 return FALSE; 150 for( ; true; ) 151 { 152 CPLString osName, osValue; 153 if( !ReadPair( osName, osValue, oCur ) ) 154 return FALSE; 155 156 if( EQUAL(osName,"OBJECT") || EQUAL(osName,"GROUP") ) 157 { 158 CPLJSONObject oNewGroup; 159 oNewGroup.Add( "_type", EQUAL(osName,"OBJECT") ? "object" : "group" ); 160 if( !ReadGroup( (CPLString(pszPathPrefix) + osValue + ".").c_str(), 161 oNewGroup, nRecLevel + 1 ) ) 162 { 163 return FALSE; 164 } 165 CPLJSONObject oName = oNewGroup["Name"]; 166 if( (osValue == "Table" || osValue == "Field") && 167 (oName.GetType() == CPLJSONObject::Type::String) ) 168 { 169 oCur.Add( osValue + "_" + oName.ToString(), oNewGroup ); 170 oNewGroup.Add( "_container_name", osValue ); 171 } 172 else if( oCur[osValue].IsValid() ) 173 { 174 int nIter = 2; 175 while( oCur[osValue + CPLSPrintf("_%d", nIter)].IsValid() ) 176 { 177 nIter ++; 178 } 179 oCur.Add( osValue + CPLSPrintf("_%d", nIter), oNewGroup ); 180 oNewGroup.Add( "_container_name", osValue ); 181 } 182 else 183 { 184 oCur.Add( osValue, oNewGroup ); 185 } 186 } 187 else if( EQUAL(osName,"END") 188 || EQUAL(osName,"END_GROUP" ) 189 || EQUAL(osName,"END_OBJECT" ) ) 190 { 191 return TRUE; 192 } 193 else 194 { 195 osName = pszPathPrefix + osName; 196 papszKeywordList = CSLSetNameValue( papszKeywordList, 197 osName, osValue ); 198 } 199 } 200 } 201 202 /************************************************************************/ 203 /* StripQuotesIfNeeded() */ 204 /************************************************************************/ 205 206 static CPLString StripQuotesIfNeeded(const CPLString& osWord, 207 bool bQuotesAlreadyRemoved) 208 { 209 if( bQuotesAlreadyRemoved || osWord.size() < 2 || osWord[0] != '"' ) 210 return osWord; 211 return osWord.substr(1, osWord.size() - 2); 212 } 213 214 /************************************************************************/ 215 /* ReadPair() */ 216 /* */ 217 /* Read a name/value pair from the input stream. Strip off */ 218 /* white space, ignore comments, split on '='. */ 219 /* Returns TRUE on success. */ 220 /************************************************************************/ 221 222 int NASAKeywordHandler::ReadPair( CPLString &osName, CPLString &osValue, 223 CPLJSONObject &oCur ) 224 225 { 226 osName = ""; 227 osValue = ""; 228 229 if( !ReadWord( osName ) ) 230 return FALSE; 231 232 SkipWhite(); 233 234 if( EQUAL(osName,"END") ) 235 return TRUE; 236 237 if( *pszHeaderNext != '=' ) 238 { 239 // ISIS3 does not have anything after the end group/object keyword. 240 if( EQUAL(osName,"End_Group") || EQUAL(osName,"End_Object") ) 241 return TRUE; 242 243 return FALSE; 244 } 245 246 pszHeaderNext++; 247 248 SkipWhite(); 249 250 osValue = ""; 251 bool bIsString = true; 252 253 // Handle value lists like: 254 // Name = (Red, Red) or {Red, Red} or even ({Red, Red}, {Red, Red}) 255 CPLJSONArray oArray; 256 if( *pszHeaderNext == '(' || *pszHeaderNext == '{' ) 257 { 258 std::vector<char> oStackArrayBeginChar; 259 CPLString osWord; 260 261 oStackArrayBeginChar.push_back(*pszHeaderNext); 262 osValue += *pszHeaderNext; 263 pszHeaderNext++; 264 265 while( ReadWord( osWord, m_bStripSurroundingQuotes, 266 true, &bIsString ) ) 267 { 268 if( *pszHeaderNext == '(' || *pszHeaderNext == '{' ) 269 { 270 oStackArrayBeginChar.push_back(*pszHeaderNext); 271 osValue += *pszHeaderNext; 272 pszHeaderNext ++; 273 } 274 275 // TODO: we could probably do better with nested json arrays 276 // instead of flattening when there are (( )) or ({ }) constructs 277 if( bIsString ) 278 { 279 if( !(osWord.empty() && (*pszHeaderNext == '(' || 280 *pszHeaderNext == '{' || *pszHeaderNext == ')' || 281 *pszHeaderNext == '}')) ) 282 { 283 oArray.Add(StripQuotesIfNeeded(osWord, m_bStripSurroundingQuotes)); 284 } 285 } 286 else if( CPLGetValueType(osWord) == CPL_VALUE_INTEGER ) 287 { 288 oArray.Add( atoi(osWord) ); 289 } 290 else 291 { 292 oArray.Add( CPLAtof(osWord) ); 293 } 294 295 osValue += osWord; 296 while ( isspace( static_cast<unsigned char>( *pszHeaderNext ) ) ) 297 { 298 pszHeaderNext++; 299 } 300 301 if( *pszHeaderNext == ')' ) 302 { 303 osValue += *pszHeaderNext; 304 if( oStackArrayBeginChar.empty() || 305 oStackArrayBeginChar.back() != '(' ) 306 { 307 CPLDebug("PDS", "Unpaired ( ) for %s", osName.c_str()); 308 return FALSE; 309 } 310 oStackArrayBeginChar.pop_back(); 311 pszHeaderNext ++; 312 if( oStackArrayBeginChar.empty() ) 313 break; 314 } 315 else if( *pszHeaderNext == '}' ) 316 { 317 osValue += *pszHeaderNext; 318 if( oStackArrayBeginChar.empty() || 319 oStackArrayBeginChar.back() != '{' ) 320 { 321 CPLDebug("PDS", "Unpaired { } for %s", osName.c_str()); 322 return FALSE; 323 } 324 oStackArrayBeginChar.pop_back(); 325 pszHeaderNext ++; 326 if( oStackArrayBeginChar.empty() ) 327 break; 328 } 329 else if( *pszHeaderNext == ',' ) 330 { 331 osValue += *pszHeaderNext; 332 pszHeaderNext ++; 333 // Do not use SkipWhite() here to avoid being confuse by 334 // constructs like 335 // FOO = (#123456, 336 // #123456) 337 // where we could confuse the second line with a comment. 338 while ( isspace( static_cast<unsigned char>( *pszHeaderNext ) ) ) 339 { 340 pszHeaderNext++; 341 } 342 } 343 SkipWhite(); 344 345 } 346 } 347 348 else // Handle more normal "single word" values. 349 { 350 if( !ReadWord( osValue, m_bStripSurroundingQuotes, false, &bIsString ) ) 351 return FALSE; 352 } 353 354 SkipWhite(); 355 356 // No units keyword? 357 if( *pszHeaderNext != '<' ) 358 { 359 if( !EQUAL(osName, "OBJECT") && !EQUAL(osName, "GROUP") ) 360 { 361 if( oArray.Size() > 0 ) 362 { 363 oCur.Add(osName, oArray); 364 } 365 else 366 { 367 if( bIsString ) 368 { 369 oCur.Add( osName, StripQuotesIfNeeded(osValue, m_bStripSurroundingQuotes) ); 370 } 371 else if( CPLGetValueType(osValue) == CPL_VALUE_INTEGER ) 372 { 373 oCur.Add( osName, atoi(osValue) ); 374 } 375 else 376 { 377 oCur.Add( osName, CPLAtof(osValue) ); 378 } 379 } 380 } 381 return TRUE; 382 } 383 384 CPLString osValueNoUnit(osValue); 385 // Append units keyword. For lines that like like this: 386 // MAP_RESOLUTION = 4.0 <PIXEL/DEGREE> 387 388 osValue += " "; 389 390 CPLString osWord; 391 CPLString osUnit; 392 while( ReadWord( osWord ) ) 393 { 394 SkipWhite(); 395 396 osValue += osWord; 397 osUnit = osWord; 398 if( osWord.back() == '>' ) 399 break; 400 } 401 402 if( osUnit[0] == '<' ) 403 osUnit = osUnit.substr(1); 404 if( !osUnit.empty() && osUnit.back() == '>' ) 405 osUnit = osUnit.substr(0, osUnit.size() - 1); 406 407 CPLJSONObject newObject; 408 oCur.Add( osName, newObject ); 409 410 if( oArray.Size() > 0 ) 411 { 412 newObject.Add( "value", oArray ); 413 } 414 else 415 { 416 if( bIsString ) 417 { 418 newObject.Add( "value", osValueNoUnit ); 419 } 420 else if( CPLGetValueType(osValueNoUnit) == CPL_VALUE_INTEGER ) 421 { 422 newObject.Add( "value", atoi(osValueNoUnit) ); 423 } 424 else 425 { 426 newObject.Add( "value", CPLAtof(osValueNoUnit) ); 427 } 428 } 429 newObject.Add( "unit", osUnit ); 430 431 return TRUE; 432 } 433 434 /************************************************************************/ 435 /* ReadWord() */ 436 /* Returns TRUE on success */ 437 /************************************************************************/ 438 439 int NASAKeywordHandler::ReadWord( CPLString &osWord, 440 bool bStripSurroundingQuotes, 441 bool bParseList, 442 bool* pbIsString ) 443 444 { 445 if( pbIsString ) 446 *pbIsString = false; 447 osWord = ""; 448 449 SkipWhite(); 450 451 if( !(*pszHeaderNext != '\0' 452 && *pszHeaderNext != '=' 453 && !isspace( static_cast<unsigned char>( *pszHeaderNext ) ) ) ) 454 return FALSE; 455 456 /* Extract a text string delimited by '\"' */ 457 /* Convert newlines (CR or LF) within quotes. While text strings 458 support them as per ODL, the keyword list doesn't want them */ 459 if( *pszHeaderNext == '"' ) 460 { 461 if( pbIsString ) 462 *pbIsString = true; 463 if( !bStripSurroundingQuotes ) 464 osWord += *(pszHeaderNext); 465 pszHeaderNext ++; 466 while( *pszHeaderNext != '"' ) 467 { 468 if( *pszHeaderNext == '\0' ) 469 return FALSE; 470 if( *pszHeaderNext == '\n' ) 471 { 472 osWord += "\\n"; 473 pszHeaderNext++; 474 continue; 475 } 476 if( *pszHeaderNext == '\r' ) 477 { 478 osWord += "\\r"; 479 pszHeaderNext++; 480 continue; 481 } 482 osWord += *(pszHeaderNext++); 483 } 484 if( !bStripSurroundingQuotes ) 485 osWord += *(pszHeaderNext); 486 pszHeaderNext ++; 487 488 return TRUE; 489 } 490 491 /* Extract a symbol string */ 492 /* These are expected to not have 493 '\'' (delimiters), 494 format effectors (should fit on a single line) or 495 control characters. 496 */ 497 if( *pszHeaderNext == '\'' ) 498 { 499 if( pbIsString ) 500 *pbIsString = true; 501 if( !bStripSurroundingQuotes ) 502 osWord += *(pszHeaderNext); 503 pszHeaderNext ++; 504 while( *pszHeaderNext != '\'' ) 505 { 506 if( *pszHeaderNext == '\0' ) 507 return FALSE; 508 509 osWord += *(pszHeaderNext++); 510 } 511 if( !bStripSurroundingQuotes ) 512 osWord += *(pszHeaderNext); 513 pszHeaderNext ++; 514 return TRUE; 515 } 516 517 /* 518 * Extract normal text. Terminated by '=' or whitespace. 519 * 520 * A special exception is that a line may terminate with a '-' 521 * which is taken as a line extender, and we suck up white space to new 522 * text. 523 */ 524 while( *pszHeaderNext != '\0' 525 && *pszHeaderNext != '=' 526 && ((bParseList && *pszHeaderNext != ',' && *pszHeaderNext != '(' && 527 *pszHeaderNext != ')'&& *pszHeaderNext != '{' && 528 *pszHeaderNext != '}' ) || 529 (!bParseList && !isspace(static_cast<unsigned char>( *pszHeaderNext ) ))) ) 530 { 531 osWord += *pszHeaderNext; 532 pszHeaderNext++; 533 534 if( *pszHeaderNext == '-' 535 && (pszHeaderNext[1] == 10 || pszHeaderNext[1] == 13) ) 536 { 537 pszHeaderNext += 2; 538 SkipWhite(); 539 } 540 } 541 542 if( pbIsString ) 543 *pbIsString = CPLGetValueType(osWord) == CPL_VALUE_STRING; 544 545 return TRUE; 546 } 547 548 /************************************************************************/ 549 /* SkipWhite() */ 550 /* Skip white spaces and C style comments */ 551 /************************************************************************/ 552 553 void NASAKeywordHandler::SkipWhite() 554 555 { 556 for( ; true; ) 557 { 558 // Skip C style comments 559 if( *pszHeaderNext == '/' && pszHeaderNext[1] == '*' ) 560 { 561 pszHeaderNext += 2; 562 563 while( *pszHeaderNext != '\0' 564 && (*pszHeaderNext != '*' 565 || pszHeaderNext[1] != '/' ) ) 566 { 567 pszHeaderNext++; 568 } 569 if( *pszHeaderNext == '\0' ) 570 return; 571 572 pszHeaderNext += 2; 573 574 // consume till end of line. 575 // reduce sensibility to a label error 576 while( *pszHeaderNext != '\0' 577 && *pszHeaderNext != 10 578 && *pszHeaderNext != 13 ) 579 { 580 pszHeaderNext++; 581 } 582 continue; 583 } 584 585 // Skip # style comments 586 if( (*pszHeaderNext == 10 || *pszHeaderNext == 13 || 587 *pszHeaderNext == ' ' || *pszHeaderNext == '\t' ) 588 && pszHeaderNext[1] == '#' ) 589 { 590 pszHeaderNext += 2; 591 592 // consume till end of line. 593 while( *pszHeaderNext != '\0' 594 && *pszHeaderNext != 10 595 && *pszHeaderNext != 13 ) 596 { 597 pszHeaderNext++; 598 } 599 continue; 600 } 601 602 // Skip white space (newline, space, tab, etc ) 603 if( isspace( static_cast<unsigned char>( *pszHeaderNext ) ) ) 604 { 605 pszHeaderNext++; 606 continue; 607 } 608 609 // not white space, return. 610 return; 611 } 612 } 613 614 /************************************************************************/ 615 /* GetKeyword() */ 616 /************************************************************************/ 617 618 const char *NASAKeywordHandler::GetKeyword( const char *pszPath, 619 const char *pszDefault ) 620 621 { 622 const char *pszResult = CSLFetchNameValue( papszKeywordList, pszPath ); 623 624 if( pszResult == nullptr ) 625 return pszDefault; 626 627 return pszResult; 628 } 629 630 /************************************************************************/ 631 /* GetKeywordList() */ 632 /************************************************************************/ 633 634 char **NASAKeywordHandler::GetKeywordList() 635 { 636 return papszKeywordList; 637 } 638 639 /************************************************************************/ 640 /* StealJSon() */ 641 /************************************************************************/ 642 643 CPLJSONObject NASAKeywordHandler::GetJsonObject() const 644 { 645 return oJSon; 646 } 647 648 //! @endcond 649