1 /******************************************************************************
2  * $Id: gdal_pdf.h 5afc2a1dfe5c2735beb1ff8f1ffb3fc125a93bf4 2020-01-12 23:57:04 +0100 Even Rouault $
3  *
4  * Project:  PDF Translator
5  * Purpose:  Definition of classes for OGR .pdf driver.
6  * Author:   Even Rouault, even dot rouault at spatialys.com
7  *
8  ******************************************************************************
9  *
10  * Support for open-source PDFium library
11  *
12  * Copyright (C) 2015 Klokan Technologies GmbH (http://www.klokantech.com/)
13  * Author: Martin Mikita <martin.mikita@klokantech.com>, xmikit00 @ FIT VUT Brno
14  *
15  ******************************************************************************
16  * Copyright (c) 2010-2014, Even Rouault <even dot rouault at spatialys.com>
17  *
18  * Permission is hereby granted, free of charge, to any person obtaining a
19  * copy of this software and associated documentation files (the "Software"),
20  * to deal in the Software without restriction, including without limitation
21  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
22  * and/or sell copies of the Software, and to permit persons to whom the
23  * Software is furnished to do so, subject to the following conditions:
24  *
25  * The above copyright notice and this permission notice shall be included
26  * in all copies or substantial portions of the Software.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
29  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
31  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34  * DEALINGS IN THE SOFTWARE.
35  ****************************************************************************/
36 
37 #ifndef GDAL_PDF_H_INCLUDED
38 #define GDAL_PDF_H_INCLUDED
39 
40 /* hack for PDF driver and poppler >= 0.15.0 that defines incompatible "typedef bool GBool" */
41 /* in include/poppler/goo/gtypes.h with the one defined in cpl_port.h */
42 #define CPL_GBOOL_DEFINED
43 #define OGR_FEATURESTYLE_INCLUDE
44 #include "cpl_port.h"
45 
46 #include <map>
47 #include <set>
48 #include <stack>
49 #include <utility>
50 #include <bitset>   // For detecting usage of PDF library
51 #include <algorithm>
52 
53 #include "pdfsdk_headers.h"
54 
55 #include "gdal_pam.h"
56 #include "ogrsf_frmts.h"
57 
58 #include "ogr_mem.h"
59 #include "pdfobject.h"
60 
61 #define     PDFLIB_POPPLER    0
62 #define     PDFLIB_PODOFO     1
63 #define     PDFLIB_PDFIUM     2
64 #define     PDFLIB_COUNT      3
65 
66 #if defined(HAVE_POPPLER) || defined(HAVE_PODOFO) || defined(HAVE_PDFIUM)
67 #define HAVE_PDF_READ_SUPPORT
68 #endif
69 
70 /************************************************************************/
71 /*                             OGRPDFLayer                              */
72 /************************************************************************/
73 
74 #ifdef HAVE_PDF_READ_SUPPORT
75 
76 class PDFDataset;
77 
78 class OGRPDFLayer final: public OGRMemLayer
79 {
80     PDFDataset       *poDS;
81     int               bGeomTypeSet;
82     int               bGeomTypeMixed;
83 
84 public:
85         OGRPDFLayer( PDFDataset* poDS,
86                      const char * pszName,
87                      OGRSpatialReference *poSRS,
88                      OGRwkbGeometryType eGeomType );
89 
90     void                Fill( GDALPDFArray* poArray );
91 
92     virtual int                 TestCapability( const char * ) override;
93 };
94 
95 #endif
96 
97 /************************************************************************/
98 /*                          OGRPDFWritableLayer                         */
99 /************************************************************************/
100 
101 class PDFWritableVectorDataset;
102 
103 class OGRPDFWritableLayer final: public OGRMemLayer
104 {
105     PDFWritableVectorDataset       *poDS;
106 
107 public:
108         OGRPDFWritableLayer(PDFWritableVectorDataset* poDS,
109                     const char * pszName,
110                     OGRSpatialReference *poSRS,
111                     OGRwkbGeometryType eGeomType);
112 
113     virtual int                 TestCapability( const char * ) override;
114     virtual OGRErr              ICreateFeature( OGRFeature *poFeature ) override;
115 };
116 
117 /************************************************************************/
118 /*                            GDALPDFTileDesc                           */
119 /************************************************************************/
120 
121 typedef struct
122 {
123     GDALPDFObject* poImage;
124     double         adfCM[6];
125     double         dfWidth;
126     double         dfHeight;
127     int            nBands;
128 } GDALPDFTileDesc;
129 
130 #ifdef HAVE_PDFIUM
131 /**
132  * Structures for Document and Document's Page for PDFium library,
133  *  which does not support multi-threading.
134  * Structures keeps objects for PDFium library and exclusive mutex locks
135  *  for one-per-time access of PDFium library methods with multi-threading GDAL
136  * Structures also keeps only one object per each opened PDF document
137  *  - this saves time for opening and memory for opened objects
138  * Document is closed after closing all pages object.
139  */
140 
141 /************************************************************************/
142 /*                           TPdfiumPageStruct                          */
143 /************************************************************************/
144 
145 // Map of Pdfium pages in following structure
146 typedef struct {
147   int pageNum;
148   CPDF_Page* page;
149   CPLMutex * readMutex;
150   int sharedNum;
151 } TPdfiumPageStruct;
152 
153 typedef std::map<int, TPdfiumPageStruct*>        TMapPdfiumPages;
154 
155 /************************************************************************/
156 /*                         TPdfiumDocumentStruct                        */
157 /************************************************************************/
158 
159 // Structure for Mutex on File
160 typedef struct {
161   char* filename;
162   CPDF_Document* doc;
163   TMapPdfiumPages pages;
164   FPDF_FILEACCESS* psFileAccess;
165 } TPdfiumDocumentStruct;
166 
167 #endif  // ~ HAVE_PDFIUM
168 
169 /************************************************************************/
170 /* ==================================================================== */
171 /*                              PDFDataset                              */
172 /* ==================================================================== */
173 /************************************************************************/
174 
175 class PDFRasterBand;
176 class PDFImageRasterBand;
177 
178 #ifdef HAVE_POPPLER
179 class ObjectAutoFree;
180 #endif
181 
182 #define MAX_TOKEN_SIZE 256
183 #define TOKEN_STACK_SIZE 8
184 
185 #ifdef HAVE_PDF_READ_SUPPORT
186 
187 class PDFDataset final: public GDALPamDataset
188 {
189     friend class PDFRasterBand;
190     friend class PDFImageRasterBand;
191 
192     VSILFILE    *m_fp = nullptr;
193     PDFDataset*  poParentDS;
194 
195     CPLString    osFilename;
196     CPLString    osUserPwd;
197     char        *pszWKT;
198     double       dfDPI;
199     int          bHasCTM;
200     double       adfCTM[6];
201     double       adfGeoTransform[6];
202     int          bGeoTransformValid;
203     int          nGCPCount;
204     GDAL_GCP    *pasGCPList;
205     int          bProjDirty;
206     int          bNeatLineDirty;
207 
208     GDALMultiDomainMetadata oMDMD;
209     int          bInfoDirty;
210     int          bXMPDirty;
211 
212     std::bitset<PDFLIB_COUNT> bUseLib;
213 #ifdef HAVE_POPPLER
214     PDFDoc*      poDocPoppler;
215 #endif
216 #ifdef HAVE_PODOFO
217     PoDoFo::PdfMemDocument* poDocPodofo;
218     int          bPdfToPpmFailed;
219 #endif
220 #ifdef HAVE_PDFIUM
221     TPdfiumDocumentStruct*  poDocPdfium;
222     TPdfiumPageStruct*      poPagePdfium;
223     std::vector<PDFDataset*> apoOvrDS, apoOvrDSBackup;
224 #endif
225     GDALPDFObject* poPageObj;
226 
227     int          iPage;
228 
229     GDALPDFObject *poImageObj;
230 
231     double       dfMaxArea;
232     int          ParseLGIDictObject(GDALPDFObject* poLGIDict);
233     int          ParseLGIDictDictFirstPass(GDALPDFDictionary* poLGIDict, int* pbIsBestCandidate = nullptr);
234     int          ParseLGIDictDictSecondPass(GDALPDFDictionary* poLGIDict);
235     int          ParseProjDict(GDALPDFDictionary* poProjDict);
236     int          ParseVP(GDALPDFObject* poVP, double dfMediaBoxWidth, double dfMediaBoxHeight);
237     int          ParseMeasure(GDALPDFObject* poMeasure,
238                               double dfMediaBoxWidth, double dfMediaBoxHeight,
239                               double dfULX, double dfULY, double dfLRX, double dfLRY);
240 
241     int          bTried;
242     GByte       *pabyCachedData;
243     int          nLastBlockXOff;
244     int          nLastBlockYOff;
245 
246     OGRPolygon*  poNeatLine;
247 
248     std::vector<GDALPDFTileDesc> asTiles; /* in the order of the PDF file */
249     std::vector<int> aiTiles; /* in the order of blocks */
250     int          nBlockXSize;
251     int          nBlockYSize;
252     int          CheckTiledRaster();
253 
254     void         GuessDPI(GDALPDFDictionary* poPageDict, int* pnBands);
255     void         FindXMP(GDALPDFObject* poObj);
256     void         ParseInfo(GDALPDFObject* poObj);
257 
258 #ifdef HAVE_POPPLER
259     ObjectAutoFree* poCatalogObjectPoppler;
260 #endif
261     GDALPDFObject* poCatalogObject;
262     GDALPDFObject* GetCatalog();
263 
264 #if defined(HAVE_POPPLER) || defined(HAVE_PDFIUM)
265     void         AddLayer(const char* pszLayerName);
266 #endif
267 
268 #if defined(HAVE_POPPLER)
269     void         ExploreLayersPoppler(GDALPDFArray* poArray, CPLString osTopLayer, int nRecLevel, int& nVisited, bool& bStop);
270     void         FindLayersPoppler();
271     void         TurnLayersOnOffPoppler();
272     std::vector<std::pair<CPLString, OptionalContentGroup*> > oLayerOCGListPoppler;
273 #endif
274 
275 #ifdef HAVE_PDFIUM
276     void         ExploreLayersPdfium(GDALPDFArray* poArray, int nRecLevel, CPLString osTopLayer = "");
277     void         FindLayersPdfium();
278     void         PDFiumRenderPageBitmap(FPDF_BITMAP bitmap, FPDF_PAGE page, int start_x, int start_y,
279                                         int size_x, int size_y, const char* pszRenderingOptions);
280     void         TurnLayersOnOffPdfium();
281 
282 public:
283     typedef enum
284     {
285         VISIBILITY_DEFAULT,
286         VISIBILITY_ON,
287         VISIBILITY_OFF
288     } VisibilityState;
289 
290     VisibilityState GetVisibilityStateForOGCPdfium(int nNum, int nGen);
291 
292 private:
293     std::map< CPLString, std::pair<int,int> > oMapLayerNameToOCGNumGenPdfium;
294     std::map< std::pair<int,int>, VisibilityState > oMapOCGNumGenToVisibilityStatePdfium;
295 #endif
296 
297     CPLStringList osLayerList;
298 
299     struct LayerWithRef
300     {
301         CPLString        osName{};
302         GDALPDFObjectNum nOCGNum{};
303         int              nOCGGen = 0;
304 
LayerWithRefLayerWithRef305         LayerWithRef(const CPLString& osNameIn,
306                      const GDALPDFObjectNum& nOCGNumIn,
307                      int nOCGGenIn) :
308             osName(osNameIn), nOCGNum(nOCGNumIn), nOCGGen(nOCGGenIn) {}
309     };
310     std::vector<LayerWithRef> aoLayerWithRef;
311 
312     CPLString     FindLayerOCG(GDALPDFDictionary* poPageDict,
313                                const char* pszLayerName);
314     void          FindLayersGeneric(GDALPDFDictionary* poPageDict);
315 
316     int          bUseOCG;
317 
318     char       **papszOpenOptions;
319     static const char*  GetOption(char** papszOpenOptions,
320                                   const char* pszOptionName,
321                                   const char* pszDefaultVal);
322 
323     int                 bHasLoadedLayers;
324     int                 nLayers;
325     OGRLayer          **papoLayers;
326 
327     double              dfPageWidth;
328     double              dfPageHeight;
329     void                PDFCoordsToSRSCoords(double x, double y,
330                                              double& X, double &Y);
331 
332     std::map<int,OGRGeometry*> oMapMCID;
333     void                CleanupIntermediateResources();
334 
335     std::map<CPLString, int> oMapOperators;
336     void                InitMapOperators();
337 
338     int                 bSetStyle;
339 
340     void                ExploreTree(GDALPDFObject* poObj,
341                                     std::set< std::pair<int,int> > aoSetAlreadyVisited,
342                                     int nRecLevel);
343     void                ExploreContents(GDALPDFObject* poObj, GDALPDFObject* poResources, int nDepth, int& nVisited, bool& bStop);
344 
345     void                ExploreContentsNonStructuredInternal(GDALPDFObject* poContents,
346                                                              GDALPDFObject* poResources,
347                                                              std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer,
348                                                              OGRPDFLayer* poSingleLayer);
349     void                ExploreContentsNonStructured(GDALPDFObject* poObj, GDALPDFObject* poResources);
350 
351     int                 UnstackTokens(const char* pszToken,
352                                       int nRequiredArgs,
353                                       char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE],
354                                       int& nTokenStackSize,
355                                       double* adfCoords);
356     OGRGeometry*        ParseContent(const char* pszContent,
357                                      GDALPDFObject* poResources,
358                                      int bInitBDCStack,
359                                      int bMatchQ,
360                                      std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer,
361                                      OGRPDFLayer* poCurLayer);
362     OGRGeometry*        BuildGeometry(std::vector<double>& oCoords,
363                                       int bHasFoundFill,
364                                       int bHasMultiPart);
365 
366     int                 OpenVectorLayers(GDALPDFDictionary* poPageDict);
367 
368 #ifdef HAVE_PDFIUM
369     void    InitOverviews();
370 #endif  // ~ HAVE_PDFIUM
371 
372   public:
373                  PDFDataset(PDFDataset* poParentDS = nullptr, int nXSize = 0, int nYSize = 0);
374     virtual     ~PDFDataset();
375 
376     virtual const char* _GetProjectionRef() override;
377     virtual CPLErr GetGeoTransform( double * ) override;
378 
379     virtual CPLErr      _SetProjection(const char* pszWKTIn) override;
380     virtual CPLErr      SetGeoTransform(double* padfGeoTransform) override;
381 
GetSpatialRef()382     const OGRSpatialReference* GetSpatialRef() const override {
383         return GetSpatialRefFromOldGetProjectionRef();
384     }
SetSpatialRef(const OGRSpatialReference * poSRS)385     CPLErr SetSpatialRef(const OGRSpatialReference* poSRS) override {
386         return OldSetProjectionFromSetSpatialRef(poSRS);
387     }
388 
389     virtual char      **GetMetadataDomainList() override;
390     virtual char      **GetMetadata( const char * pszDomain = "" ) override;
391     virtual CPLErr      SetMetadata( char ** papszMetadata,
392                                      const char * pszDomain = "" ) override;
393     virtual const char *GetMetadataItem( const char * pszName,
394                                          const char * pszDomain = "" ) override;
395     virtual CPLErr      SetMetadataItem( const char * pszName,
396                                          const char * pszValue,
397                                          const char * pszDomain = "" ) override;
398 
399     virtual CPLErr IRasterIO( GDALRWFlag, int, int, int, int,
400                               void *, int, int, GDALDataType,
401                               int, int *,
402                               GSpacing nPixelSpace, GSpacing nLineSpace,
403                               GSpacing nBandSpace,
404                               GDALRasterIOExtraArg* psExtraArg) override;
405 
406     virtual int    GetGCPCount() override;
407     virtual const char *_GetGCPProjection() override;
GetGCPSpatialRef()408     const OGRSpatialReference* GetGCPSpatialRef() const override {
409         return GetGCPSpatialRefFromOldGetGCPProjection();
410     }
411     virtual const GDAL_GCP *GetGCPs() override;
412     virtual CPLErr _SetGCPs( int nGCPCount, const GDAL_GCP *pasGCPList,
413                             const char *pszGCPProjection ) override;
414     using GDALPamDataset::SetGCPs;
SetGCPs(int nGCPCountIn,const GDAL_GCP * pasGCPListIn,const OGRSpatialReference * poSRS)415     CPLErr SetGCPs( int nGCPCountIn, const GDAL_GCP *pasGCPListIn,
416                     const OGRSpatialReference* poSRS ) override {
417         return OldSetGCPsFromNew(nGCPCountIn, pasGCPListIn, poSRS);
418     }
419 
420     CPLErr ReadPixels( int nReqXOff, int nReqYOff,
421                        int nReqXSize, int nReqYSize,
422                        GSpacing nPixelSpace,
423                        GSpacing nLineSpace,
424                        GSpacing nBandSpace,
425                        GByte* pabyData );
426 
427     virtual int                 GetLayerCount() override;
428     virtual OGRLayer*           GetLayer( int ) override;
429 
430     virtual int                 TestCapability( const char * ) override;
431 
432     OGRGeometry        *GetGeometryFromMCID(int nMCID);
433 
GetPageObj()434     GDALPDFObject*      GetPageObj() { return poPageObj; }
GetPageWidth()435     double              GetPageWidth() const { return dfPageWidth; }
GetPageHeight()436     double              GetPageHeight() const { return dfPageHeight; }
437 
438     static PDFDataset  *Open( GDALOpenInfo * );
OpenWrapper(GDALOpenInfo * poOpenInfo)439     static GDALDataset *OpenWrapper( GDALOpenInfo * poOpenInfo ) { return Open(poOpenInfo); }
440     static int          Identify( GDALOpenInfo * );
441 
442 #ifdef HAVE_PDFIUM
443     virtual CPLErr IBuildOverviews( const char *, int, int *,
444                                     int, int *, GDALProgressFunc, void * ) override;
445 
446     static int bPdfiumInit;
447 #endif
448 };
449 
450 /************************************************************************/
451 /* ==================================================================== */
452 /*                         PDFRasterBand                                */
453 /* ==================================================================== */
454 /************************************************************************/
455 
456 class PDFRasterBand CPL_NON_FINAL: public GDALPamRasterBand
457 {
458     friend class PDFDataset;
459 
460     int   nResolutionLevel;
461 
462     CPLErr IReadBlockFromTile( int, int, void * );
463 
464   public:
465 
466                 PDFRasterBand( PDFDataset *, int, int );
467     virtual ~PDFRasterBand();
468 
469 #ifdef HAVE_PDFIUM
470     virtual int    GetOverviewCount() override;
471     virtual GDALRasterBand *GetOverview( int ) override;
472 #endif  // ~ HAVE_PDFIUM
473 
474     virtual CPLErr IReadBlock( int, int, void * ) override;
475     virtual GDALColorInterp GetColorInterpretation() override;
476 
477 #ifdef notdef
478     virtual CPLErr IRasterIO( GDALRWFlag, int, int, int, int,
479                               void *, int, int, GDALDataType,
480                               GSpacing nPixelSpace, GSpacing nLineSpace,
481                               GDALRasterIOExtraArg* psExtraArg) override;
482 #endif
483 };
484 
485 #endif /* HAVE_PDF_READ_SUPPORT */
486 
487 /************************************************************************/
488 /*                          PDFWritableDataset                          */
489 /************************************************************************/
490 
491 class PDFWritableVectorDataset final: public GDALDataset
492 {
493         char**              papszOptions;
494 
495         int                 nLayers;
496         OGRLayer          **papoLayers;
497 
498         int                 bModified;
499 
500     public:
501                             PDFWritableVectorDataset();
502         virtual ~PDFWritableVectorDataset();
503 
504         virtual OGRLayer*           ICreateLayer( const char * pszLayerName,
505                                                 OGRSpatialReference *poSRS,
506                                                 OGRwkbGeometryType eType,
507                                                 char ** papszOptions ) override;
508 
509         virtual OGRErr              SyncToDisk();
510 
511         virtual int                 GetLayerCount() override;
512         virtual OGRLayer*           GetLayer( int ) override;
513 
514         virtual int                 TestCapability( const char * ) override;
515 
516         static GDALDataset* Create( const char * pszName,
517                                  int nXSize, int nYSize, int nBands,
518                                  GDALDataType eType, char ** papszOptions );
519 
SetModified()520         void                SetModified() { bModified = TRUE; }
521 };
522 
523 GDALDataset* GDALPDFOpen(const char* pszFilename, GDALAccess eAccess);
524 CPLString PDFSanitizeLayerName(const char* pszName);
525 
526 #endif /* ndef GDAL_PDF_H_INCLUDED */
527