1 /****************************************************************************** 2 * $Id: gdal_pdf.h 5afc2a1dfe5c2735beb1ff8f1ffb3fc125a93bf4 2020-01-12 23:57:04 +0100 Even Rouault $ 3 * 4 * Project: PDF Translator 5 * Purpose: Definition of classes for OGR .pdf driver. 6 * Author: Even Rouault, even dot rouault at spatialys.com 7 * 8 ****************************************************************************** 9 * 10 * Support for open-source PDFium library 11 * 12 * Copyright (C) 2015 Klokan Technologies GmbH (http://www.klokantech.com/) 13 * Author: Martin Mikita <martin.mikita@klokantech.com>, xmikit00 @ FIT VUT Brno 14 * 15 ****************************************************************************** 16 * Copyright (c) 2010-2014, Even Rouault <even dot rouault at spatialys.com> 17 * 18 * Permission is hereby granted, free of charge, to any person obtaining a 19 * copy of this software and associated documentation files (the "Software"), 20 * to deal in the Software without restriction, including without limitation 21 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 22 * and/or sell copies of the Software, and to permit persons to whom the 23 * Software is furnished to do so, subject to the following conditions: 24 * 25 * The above copyright notice and this permission notice shall be included 26 * in all copies or substantial portions of the Software. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 29 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 31 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 34 * DEALINGS IN THE SOFTWARE. 35 ****************************************************************************/ 36 37 #ifndef GDAL_PDF_H_INCLUDED 38 #define GDAL_PDF_H_INCLUDED 39 40 /* hack for PDF driver and poppler >= 0.15.0 that defines incompatible "typedef bool GBool" */ 41 /* in include/poppler/goo/gtypes.h with the one defined in cpl_port.h */ 42 #define CPL_GBOOL_DEFINED 43 #define OGR_FEATURESTYLE_INCLUDE 44 #include "cpl_port.h" 45 46 #include <map> 47 #include <set> 48 #include <stack> 49 #include <utility> 50 #include <bitset> // For detecting usage of PDF library 51 #include <algorithm> 52 53 #include "pdfsdk_headers.h" 54 55 #include "gdal_pam.h" 56 #include "ogrsf_frmts.h" 57 58 #include "ogr_mem.h" 59 #include "pdfobject.h" 60 61 #define PDFLIB_POPPLER 0 62 #define PDFLIB_PODOFO 1 63 #define PDFLIB_PDFIUM 2 64 #define PDFLIB_COUNT 3 65 66 #if defined(HAVE_POPPLER) || defined(HAVE_PODOFO) || defined(HAVE_PDFIUM) 67 #define HAVE_PDF_READ_SUPPORT 68 #endif 69 70 /************************************************************************/ 71 /* OGRPDFLayer */ 72 /************************************************************************/ 73 74 #ifdef HAVE_PDF_READ_SUPPORT 75 76 class PDFDataset; 77 78 class OGRPDFLayer final: public OGRMemLayer 79 { 80 PDFDataset *poDS; 81 int bGeomTypeSet; 82 int bGeomTypeMixed; 83 84 public: 85 OGRPDFLayer( PDFDataset* poDS, 86 const char * pszName, 87 OGRSpatialReference *poSRS, 88 OGRwkbGeometryType eGeomType ); 89 90 void Fill( GDALPDFArray* poArray ); 91 92 virtual int TestCapability( const char * ) override; 93 }; 94 95 #endif 96 97 /************************************************************************/ 98 /* OGRPDFWritableLayer */ 99 /************************************************************************/ 100 101 class PDFWritableVectorDataset; 102 103 class OGRPDFWritableLayer final: public OGRMemLayer 104 { 105 PDFWritableVectorDataset *poDS; 106 107 public: 108 OGRPDFWritableLayer(PDFWritableVectorDataset* poDS, 109 const char * pszName, 110 OGRSpatialReference *poSRS, 111 OGRwkbGeometryType eGeomType); 112 113 virtual int TestCapability( const char * ) override; 114 virtual OGRErr ICreateFeature( OGRFeature *poFeature ) override; 115 }; 116 117 /************************************************************************/ 118 /* GDALPDFTileDesc */ 119 /************************************************************************/ 120 121 typedef struct 122 { 123 GDALPDFObject* poImage; 124 double adfCM[6]; 125 double dfWidth; 126 double dfHeight; 127 int nBands; 128 } GDALPDFTileDesc; 129 130 #ifdef HAVE_PDFIUM 131 /** 132 * Structures for Document and Document's Page for PDFium library, 133 * which does not support multi-threading. 134 * Structures keeps objects for PDFium library and exclusive mutex locks 135 * for one-per-time access of PDFium library methods with multi-threading GDAL 136 * Structures also keeps only one object per each opened PDF document 137 * - this saves time for opening and memory for opened objects 138 * Document is closed after closing all pages object. 139 */ 140 141 /************************************************************************/ 142 /* TPdfiumPageStruct */ 143 /************************************************************************/ 144 145 // Map of Pdfium pages in following structure 146 typedef struct { 147 int pageNum; 148 CPDF_Page* page; 149 CPLMutex * readMutex; 150 int sharedNum; 151 } TPdfiumPageStruct; 152 153 typedef std::map<int, TPdfiumPageStruct*> TMapPdfiumPages; 154 155 /************************************************************************/ 156 /* TPdfiumDocumentStruct */ 157 /************************************************************************/ 158 159 // Structure for Mutex on File 160 typedef struct { 161 char* filename; 162 CPDF_Document* doc; 163 TMapPdfiumPages pages; 164 FPDF_FILEACCESS* psFileAccess; 165 } TPdfiumDocumentStruct; 166 167 #endif // ~ HAVE_PDFIUM 168 169 /************************************************************************/ 170 /* ==================================================================== */ 171 /* PDFDataset */ 172 /* ==================================================================== */ 173 /************************************************************************/ 174 175 class PDFRasterBand; 176 class PDFImageRasterBand; 177 178 #ifdef HAVE_POPPLER 179 class ObjectAutoFree; 180 #endif 181 182 #define MAX_TOKEN_SIZE 256 183 #define TOKEN_STACK_SIZE 8 184 185 #ifdef HAVE_PDF_READ_SUPPORT 186 187 class PDFDataset final: public GDALPamDataset 188 { 189 friend class PDFRasterBand; 190 friend class PDFImageRasterBand; 191 192 VSILFILE *m_fp = nullptr; 193 PDFDataset* poParentDS; 194 195 CPLString osFilename; 196 CPLString osUserPwd; 197 char *pszWKT; 198 double dfDPI; 199 int bHasCTM; 200 double adfCTM[6]; 201 double adfGeoTransform[6]; 202 int bGeoTransformValid; 203 int nGCPCount; 204 GDAL_GCP *pasGCPList; 205 int bProjDirty; 206 int bNeatLineDirty; 207 208 GDALMultiDomainMetadata oMDMD; 209 int bInfoDirty; 210 int bXMPDirty; 211 212 std::bitset<PDFLIB_COUNT> bUseLib; 213 #ifdef HAVE_POPPLER 214 PDFDoc* poDocPoppler; 215 #endif 216 #ifdef HAVE_PODOFO 217 PoDoFo::PdfMemDocument* poDocPodofo; 218 int bPdfToPpmFailed; 219 #endif 220 #ifdef HAVE_PDFIUM 221 TPdfiumDocumentStruct* poDocPdfium; 222 TPdfiumPageStruct* poPagePdfium; 223 std::vector<PDFDataset*> apoOvrDS, apoOvrDSBackup; 224 #endif 225 GDALPDFObject* poPageObj; 226 227 int iPage; 228 229 GDALPDFObject *poImageObj; 230 231 double dfMaxArea; 232 int ParseLGIDictObject(GDALPDFObject* poLGIDict); 233 int ParseLGIDictDictFirstPass(GDALPDFDictionary* poLGIDict, int* pbIsBestCandidate = nullptr); 234 int ParseLGIDictDictSecondPass(GDALPDFDictionary* poLGIDict); 235 int ParseProjDict(GDALPDFDictionary* poProjDict); 236 int ParseVP(GDALPDFObject* poVP, double dfMediaBoxWidth, double dfMediaBoxHeight); 237 int ParseMeasure(GDALPDFObject* poMeasure, 238 double dfMediaBoxWidth, double dfMediaBoxHeight, 239 double dfULX, double dfULY, double dfLRX, double dfLRY); 240 241 int bTried; 242 GByte *pabyCachedData; 243 int nLastBlockXOff; 244 int nLastBlockYOff; 245 246 OGRPolygon* poNeatLine; 247 248 std::vector<GDALPDFTileDesc> asTiles; /* in the order of the PDF file */ 249 std::vector<int> aiTiles; /* in the order of blocks */ 250 int nBlockXSize; 251 int nBlockYSize; 252 int CheckTiledRaster(); 253 254 void GuessDPI(GDALPDFDictionary* poPageDict, int* pnBands); 255 void FindXMP(GDALPDFObject* poObj); 256 void ParseInfo(GDALPDFObject* poObj); 257 258 #ifdef HAVE_POPPLER 259 ObjectAutoFree* poCatalogObjectPoppler; 260 #endif 261 GDALPDFObject* poCatalogObject; 262 GDALPDFObject* GetCatalog(); 263 264 #if defined(HAVE_POPPLER) || defined(HAVE_PDFIUM) 265 void AddLayer(const char* pszLayerName); 266 #endif 267 268 #if defined(HAVE_POPPLER) 269 void ExploreLayersPoppler(GDALPDFArray* poArray, CPLString osTopLayer, int nRecLevel, int& nVisited, bool& bStop); 270 void FindLayersPoppler(); 271 void TurnLayersOnOffPoppler(); 272 std::vector<std::pair<CPLString, OptionalContentGroup*> > oLayerOCGListPoppler; 273 #endif 274 275 #ifdef HAVE_PDFIUM 276 void ExploreLayersPdfium(GDALPDFArray* poArray, int nRecLevel, CPLString osTopLayer = ""); 277 void FindLayersPdfium(); 278 void PDFiumRenderPageBitmap(FPDF_BITMAP bitmap, FPDF_PAGE page, int start_x, int start_y, 279 int size_x, int size_y, const char* pszRenderingOptions); 280 void TurnLayersOnOffPdfium(); 281 282 public: 283 typedef enum 284 { 285 VISIBILITY_DEFAULT, 286 VISIBILITY_ON, 287 VISIBILITY_OFF 288 } VisibilityState; 289 290 VisibilityState GetVisibilityStateForOGCPdfium(int nNum, int nGen); 291 292 private: 293 std::map< CPLString, std::pair<int,int> > oMapLayerNameToOCGNumGenPdfium; 294 std::map< std::pair<int,int>, VisibilityState > oMapOCGNumGenToVisibilityStatePdfium; 295 #endif 296 297 CPLStringList osLayerList; 298 299 struct LayerWithRef 300 { 301 CPLString osName{}; 302 GDALPDFObjectNum nOCGNum{}; 303 int nOCGGen = 0; 304 LayerWithRefLayerWithRef305 LayerWithRef(const CPLString& osNameIn, 306 const GDALPDFObjectNum& nOCGNumIn, 307 int nOCGGenIn) : 308 osName(osNameIn), nOCGNum(nOCGNumIn), nOCGGen(nOCGGenIn) {} 309 }; 310 std::vector<LayerWithRef> aoLayerWithRef; 311 312 CPLString FindLayerOCG(GDALPDFDictionary* poPageDict, 313 const char* pszLayerName); 314 void FindLayersGeneric(GDALPDFDictionary* poPageDict); 315 316 int bUseOCG; 317 318 char **papszOpenOptions; 319 static const char* GetOption(char** papszOpenOptions, 320 const char* pszOptionName, 321 const char* pszDefaultVal); 322 323 int bHasLoadedLayers; 324 int nLayers; 325 OGRLayer **papoLayers; 326 327 double dfPageWidth; 328 double dfPageHeight; 329 void PDFCoordsToSRSCoords(double x, double y, 330 double& X, double &Y); 331 332 std::map<int,OGRGeometry*> oMapMCID; 333 void CleanupIntermediateResources(); 334 335 std::map<CPLString, int> oMapOperators; 336 void InitMapOperators(); 337 338 int bSetStyle; 339 340 void ExploreTree(GDALPDFObject* poObj, 341 std::set< std::pair<int,int> > aoSetAlreadyVisited, 342 int nRecLevel); 343 void ExploreContents(GDALPDFObject* poObj, GDALPDFObject* poResources, int nDepth, int& nVisited, bool& bStop); 344 345 void ExploreContentsNonStructuredInternal(GDALPDFObject* poContents, 346 GDALPDFObject* poResources, 347 std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer, 348 OGRPDFLayer* poSingleLayer); 349 void ExploreContentsNonStructured(GDALPDFObject* poObj, GDALPDFObject* poResources); 350 351 int UnstackTokens(const char* pszToken, 352 int nRequiredArgs, 353 char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE], 354 int& nTokenStackSize, 355 double* adfCoords); 356 OGRGeometry* ParseContent(const char* pszContent, 357 GDALPDFObject* poResources, 358 int bInitBDCStack, 359 int bMatchQ, 360 std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer, 361 OGRPDFLayer* poCurLayer); 362 OGRGeometry* BuildGeometry(std::vector<double>& oCoords, 363 int bHasFoundFill, 364 int bHasMultiPart); 365 366 int OpenVectorLayers(GDALPDFDictionary* poPageDict); 367 368 #ifdef HAVE_PDFIUM 369 void InitOverviews(); 370 #endif // ~ HAVE_PDFIUM 371 372 public: 373 PDFDataset(PDFDataset* poParentDS = nullptr, int nXSize = 0, int nYSize = 0); 374 virtual ~PDFDataset(); 375 376 virtual const char* _GetProjectionRef() override; 377 virtual CPLErr GetGeoTransform( double * ) override; 378 379 virtual CPLErr _SetProjection(const char* pszWKTIn) override; 380 virtual CPLErr SetGeoTransform(double* padfGeoTransform) override; 381 GetSpatialRef()382 const OGRSpatialReference* GetSpatialRef() const override { 383 return GetSpatialRefFromOldGetProjectionRef(); 384 } SetSpatialRef(const OGRSpatialReference * poSRS)385 CPLErr SetSpatialRef(const OGRSpatialReference* poSRS) override { 386 return OldSetProjectionFromSetSpatialRef(poSRS); 387 } 388 389 virtual char **GetMetadataDomainList() override; 390 virtual char **GetMetadata( const char * pszDomain = "" ) override; 391 virtual CPLErr SetMetadata( char ** papszMetadata, 392 const char * pszDomain = "" ) override; 393 virtual const char *GetMetadataItem( const char * pszName, 394 const char * pszDomain = "" ) override; 395 virtual CPLErr SetMetadataItem( const char * pszName, 396 const char * pszValue, 397 const char * pszDomain = "" ) override; 398 399 virtual CPLErr IRasterIO( GDALRWFlag, int, int, int, int, 400 void *, int, int, GDALDataType, 401 int, int *, 402 GSpacing nPixelSpace, GSpacing nLineSpace, 403 GSpacing nBandSpace, 404 GDALRasterIOExtraArg* psExtraArg) override; 405 406 virtual int GetGCPCount() override; 407 virtual const char *_GetGCPProjection() override; GetGCPSpatialRef()408 const OGRSpatialReference* GetGCPSpatialRef() const override { 409 return GetGCPSpatialRefFromOldGetGCPProjection(); 410 } 411 virtual const GDAL_GCP *GetGCPs() override; 412 virtual CPLErr _SetGCPs( int nGCPCount, const GDAL_GCP *pasGCPList, 413 const char *pszGCPProjection ) override; 414 using GDALPamDataset::SetGCPs; SetGCPs(int nGCPCountIn,const GDAL_GCP * pasGCPListIn,const OGRSpatialReference * poSRS)415 CPLErr SetGCPs( int nGCPCountIn, const GDAL_GCP *pasGCPListIn, 416 const OGRSpatialReference* poSRS ) override { 417 return OldSetGCPsFromNew(nGCPCountIn, pasGCPListIn, poSRS); 418 } 419 420 CPLErr ReadPixels( int nReqXOff, int nReqYOff, 421 int nReqXSize, int nReqYSize, 422 GSpacing nPixelSpace, 423 GSpacing nLineSpace, 424 GSpacing nBandSpace, 425 GByte* pabyData ); 426 427 virtual int GetLayerCount() override; 428 virtual OGRLayer* GetLayer( int ) override; 429 430 virtual int TestCapability( const char * ) override; 431 432 OGRGeometry *GetGeometryFromMCID(int nMCID); 433 GetPageObj()434 GDALPDFObject* GetPageObj() { return poPageObj; } GetPageWidth()435 double GetPageWidth() const { return dfPageWidth; } GetPageHeight()436 double GetPageHeight() const { return dfPageHeight; } 437 438 static PDFDataset *Open( GDALOpenInfo * ); OpenWrapper(GDALOpenInfo * poOpenInfo)439 static GDALDataset *OpenWrapper( GDALOpenInfo * poOpenInfo ) { return Open(poOpenInfo); } 440 static int Identify( GDALOpenInfo * ); 441 442 #ifdef HAVE_PDFIUM 443 virtual CPLErr IBuildOverviews( const char *, int, int *, 444 int, int *, GDALProgressFunc, void * ) override; 445 446 static int bPdfiumInit; 447 #endif 448 }; 449 450 /************************************************************************/ 451 /* ==================================================================== */ 452 /* PDFRasterBand */ 453 /* ==================================================================== */ 454 /************************************************************************/ 455 456 class PDFRasterBand CPL_NON_FINAL: public GDALPamRasterBand 457 { 458 friend class PDFDataset; 459 460 int nResolutionLevel; 461 462 CPLErr IReadBlockFromTile( int, int, void * ); 463 464 public: 465 466 PDFRasterBand( PDFDataset *, int, int ); 467 virtual ~PDFRasterBand(); 468 469 #ifdef HAVE_PDFIUM 470 virtual int GetOverviewCount() override; 471 virtual GDALRasterBand *GetOverview( int ) override; 472 #endif // ~ HAVE_PDFIUM 473 474 virtual CPLErr IReadBlock( int, int, void * ) override; 475 virtual GDALColorInterp GetColorInterpretation() override; 476 477 #ifdef notdef 478 virtual CPLErr IRasterIO( GDALRWFlag, int, int, int, int, 479 void *, int, int, GDALDataType, 480 GSpacing nPixelSpace, GSpacing nLineSpace, 481 GDALRasterIOExtraArg* psExtraArg) override; 482 #endif 483 }; 484 485 #endif /* HAVE_PDF_READ_SUPPORT */ 486 487 /************************************************************************/ 488 /* PDFWritableDataset */ 489 /************************************************************************/ 490 491 class PDFWritableVectorDataset final: public GDALDataset 492 { 493 char** papszOptions; 494 495 int nLayers; 496 OGRLayer **papoLayers; 497 498 int bModified; 499 500 public: 501 PDFWritableVectorDataset(); 502 virtual ~PDFWritableVectorDataset(); 503 504 virtual OGRLayer* ICreateLayer( const char * pszLayerName, 505 OGRSpatialReference *poSRS, 506 OGRwkbGeometryType eType, 507 char ** papszOptions ) override; 508 509 virtual OGRErr SyncToDisk(); 510 511 virtual int GetLayerCount() override; 512 virtual OGRLayer* GetLayer( int ) override; 513 514 virtual int TestCapability( const char * ) override; 515 516 static GDALDataset* Create( const char * pszName, 517 int nXSize, int nYSize, int nBands, 518 GDALDataType eType, char ** papszOptions ); 519 SetModified()520 void SetModified() { bModified = TRUE; } 521 }; 522 523 GDALDataset* GDALPDFOpen(const char* pszFilename, GDALAccess eAccess); 524 CPLString PDFSanitizeLayerName(const char* pszName); 525 526 #endif /* ndef GDAL_PDF_H_INCLUDED */ 527