1 /******************************************************************************
2  *
3  * Project:  PDF driver
4  * Purpose:  GDALDataset driver for PDF dataset (read vector features)
5  * Author:   Even Rouault, <even dot rouault at spatialys.com>
6  *
7  ******************************************************************************
8  * Copyright (c) 2010-2014, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #include "gdal_pdf.h"
30 
31 #include <array>
32 
33 #define SQUARE(x) ((x)*(x))
34 #define EPSILON 1e-5
35 
36 CPL_CVSID("$Id: pdfreadvectors.cpp 6c0684aa03b8700ef7ff6329ba2bce592d453c9f 2021-08-11 14:42:51 +0200 Even Rouault $")
37 
38 #ifdef HAVE_PDF_READ_SUPPORT
39 
40 constexpr int BEZIER_STEPS = 10;
41 
42 /************************************************************************/
43 /*                        OpenVectorLayers()                            */
44 /************************************************************************/
45 
OpenVectorLayers(GDALPDFDictionary * poPageDict)46 int PDFDataset::OpenVectorLayers(GDALPDFDictionary* poPageDict)
47 {
48     if( bHasLoadedLayers )
49         return TRUE;
50     bHasLoadedLayers = TRUE;
51 
52     if( poPageDict == nullptr )
53     {
54         poPageDict = poPageObj->GetDictionary();
55         if ( poPageDict == nullptr )
56             return FALSE;
57     }
58 
59     GetCatalog();
60     if( poCatalogObject == nullptr || poCatalogObject->GetType() != PDFObjectType_Dictionary )
61         return FALSE;
62 
63     GDALPDFObject* poContents = poPageDict->Get("Contents");
64     if (poContents == nullptr)
65         return FALSE;
66 
67     if (poContents->GetType() != PDFObjectType_Dictionary &&
68         poContents->GetType() != PDFObjectType_Array)
69         return FALSE;
70 
71     GDALPDFObject* poResources = poPageDict->Get("Resources");
72     if (poResources == nullptr || poResources->GetType() != PDFObjectType_Dictionary)
73         return FALSE;
74 
75     GDALPDFObject* poStructTreeRoot = poCatalogObject->GetDictionary()->Get("StructTreeRoot");
76     if (CPLTestBool(CPLGetConfigOption("OGR_PDF_READ_NON_STRUCTURED", "NO")) ||
77         poStructTreeRoot == nullptr ||
78         poStructTreeRoot->GetType() != PDFObjectType_Dictionary)
79     {
80         ExploreContentsNonStructured(poContents, poResources);
81     }
82     else
83     {
84         int nDepth = 0;
85         int nVisited = 0;
86         bool bStop = false;
87         ExploreContents(poContents, poResources, nDepth, nVisited, bStop);
88         std::set< std::pair<int,int> > aoSetAlreadyVisited;
89         ExploreTree(poStructTreeRoot, aoSetAlreadyVisited, 0);
90     }
91 
92     CleanupIntermediateResources();
93 
94     int bEmptyDS = TRUE;
95     for(int i=0;i<nLayers;i++)
96     {
97         if (papoLayers[i]->GetFeatureCount() != 0)
98         {
99             bEmptyDS = FALSE;
100             break;
101         }
102     }
103     return !bEmptyDS;
104 }
105 
106 /************************************************************************/
107 /*                   CleanupIntermediateResources()                     */
108 /************************************************************************/
109 
CleanupIntermediateResources()110 void PDFDataset::CleanupIntermediateResources()
111 {
112     std::map<int,OGRGeometry*>::iterator oMapIter = oMapMCID.begin();
113     for( ; oMapIter != oMapMCID.end(); ++oMapIter)
114         delete oMapIter->second;
115     oMapMCID.erase(oMapMCID.begin(), oMapMCID.end());
116 }
117 
118 /************************************************************************/
119 /*                          InitMapOperators()                          */
120 /************************************************************************/
121 
122 typedef struct
123 {
124     char        szOpName[4];
125     int         nArgs;
126 } PDFOperator;
127 
128 static const PDFOperator asPDFOperators [] =
129 {
130     { "b", 0 },
131     { "B", 0 },
132     { "b*", 0 },
133     { "B*", 0 },
134     { "BDC", 2 },
135     // BI
136     { "BMC", 1 },
137     // BT
138     { "BX", 0 },
139     { "c", 6 },
140     { "cm", 6 },
141     { "CS", 1 },
142     { "cs", 1 },
143     { "d", 1 }, /* we have ignored the first arg */
144     // d0
145     // d1
146     { "Do", 1 },
147     { "DP", 2 },
148     // EI
149     { "EMC", 0 },
150     // ET
151     { "EX", 0 },
152     { "f", 0 },
153     { "F", 0 },
154     { "f*", 0 },
155     { "G", 1 },
156     { "g", 1 },
157     { "gs", 1 },
158     { "h", 0 },
159     { "i", 1 },
160     // ID
161     { "j", 1 },
162     { "J", 1 },
163     { "K", 4 },
164     { "k", 4 },
165     { "l", 2 },
166     { "m", 2 },
167     { "M", 1 },
168     { "MP", 1 },
169     { "n", 0 },
170     { "q", 0 },
171     { "Q", 0 },
172     { "re", 4 },
173     { "RG", 3 },
174     { "rg", 3 },
175     { "ri", 1 },
176     { "s", 0 },
177     { "S", 0 },
178     { "SC", -1 },
179     { "sc", -1 },
180     { "SCN", -1 },
181     { "scn", -1 },
182     { "sh", 1 },
183     // T*
184     { "Tc", 1},
185     { "Td", 2},
186     { "TD", 2},
187     { "Tf", 1},
188     { "Tj", 1},
189     { "TJ", 1},
190     { "TL", 1},
191     { "Tm", 6},
192     { "Tr", 1},
193     { "Ts", 1},
194     { "Tw", 1},
195     { "Tz", 1},
196     { "v", 4 },
197     { "w", 1 },
198     { "W", 0 },
199     { "W*", 0 },
200     { "y", 4 },
201     // '
202     // "
203 };
204 
InitMapOperators()205 void PDFDataset::InitMapOperators()
206 {
207     for(size_t i=0;i<sizeof(asPDFOperators) / sizeof(asPDFOperators[0]); i++)
208         oMapOperators[asPDFOperators[i].szOpName] = asPDFOperators[i].nArgs;
209 }
210 
211 /************************************************************************/
212 /*                           TestCapability()                           */
213 /************************************************************************/
214 
TestCapability(CPL_UNUSED const char * pszCap)215 int PDFDataset::TestCapability( CPL_UNUSED const char * pszCap )
216 {
217     return FALSE;
218 }
219 
220 /************************************************************************/
221 /*                              GetLayer()                              */
222 /************************************************************************/
223 
GetLayer(int iLayer)224 OGRLayer *PDFDataset::GetLayer( int iLayer )
225 
226 {
227     OpenVectorLayers(nullptr);
228     if (iLayer < 0 || iLayer >= nLayers)
229         return nullptr;
230 
231     return papoLayers[iLayer];
232 }
233 
234 /************************************************************************/
235 /*                            GetLayerCount()                           */
236 /************************************************************************/
237 
GetLayerCount()238 int PDFDataset::GetLayerCount()
239 {
240     OpenVectorLayers(nullptr);
241     return nLayers;
242 }
243 
244 /************************************************************************/
245 /*                            ExploreTree()                             */
246 /************************************************************************/
247 
ExploreTree(GDALPDFObject * poObj,std::set<std::pair<int,int>> aoSetAlreadyVisited,int nRecLevel)248 void PDFDataset::ExploreTree(GDALPDFObject* poObj,
249                              std::set< std::pair<int,int> > aoSetAlreadyVisited,
250                              int nRecLevel)
251 {
252     if (nRecLevel == 16)
253         return;
254 
255     std::pair<int,int> oObjPair( poObj->GetRefNum().toInt(), poObj->GetRefGen() );
256     if( aoSetAlreadyVisited.find( oObjPair ) != aoSetAlreadyVisited.end() )
257         return;
258     aoSetAlreadyVisited.insert( oObjPair );
259 
260     if (poObj->GetType() != PDFObjectType_Dictionary)
261         return;
262 
263     GDALPDFDictionary* poDict = poObj->GetDictionary();
264 
265     GDALPDFObject* poS = poDict->Get("S");
266     CPLString osS;
267     if (poS != nullptr && poS->GetType() == PDFObjectType_Name)
268     {
269         osS = poS->GetName();
270     }
271 
272     GDALPDFObject* poT = poDict->Get("T");
273     CPLString osT;
274     if (poT != nullptr && poT->GetType() == PDFObjectType_String)
275     {
276         osT = poT->GetString();
277     }
278 
279     GDALPDFObject* poK = poDict->Get("K");
280     if (poK == nullptr)
281         return;
282 
283     if (poK->GetType() == PDFObjectType_Array)
284     {
285         GDALPDFArray* poArray = poK->GetArray();
286         if (poArray->GetLength() > 0 &&
287             poArray->Get(0) &&
288             poArray->Get(0)->GetType() == PDFObjectType_Dictionary &&
289             poArray->Get(0)->GetDictionary()->Get("K") != nullptr &&
290             poArray->Get(0)->GetDictionary()->Get("K")->GetType() == PDFObjectType_Int)
291         {
292             CPLString osLayerName;
293             if (!osT.empty() )
294                 osLayerName = osT;
295             else
296             {
297                 if (!osS.empty() )
298                     osLayerName = osS;
299                 else
300                     osLayerName = CPLSPrintf("Layer%d", nLayers + 1);
301             }
302 
303             auto poSRSOri = GetSpatialRef();
304             OGRSpatialReference* poSRS = poSRSOri ? poSRSOri->Clone() : nullptr;
305             OGRPDFLayer* poLayer =
306                 new OGRPDFLayer(this, osLayerName.c_str(), poSRS, wkbUnknown);
307             if( poSRS )
308                 poSRS->Release();
309 
310             poLayer->Fill(poArray);
311 
312             papoLayers = (OGRLayer**)
313                 CPLRealloc(papoLayers, (nLayers + 1) * sizeof(OGRLayer*));
314             papoLayers[nLayers] = poLayer;
315             nLayers ++;
316         }
317         else
318         {
319             for(int i=0;i<poArray->GetLength();i++)
320             {
321                 auto poSubObj = poArray->Get(i);
322                 if (poSubObj )
323                 {
324                     ExploreTree(poSubObj, aoSetAlreadyVisited,
325                                 nRecLevel + 1);
326                 }
327             }
328         }
329     }
330     else if (poK->GetType() == PDFObjectType_Dictionary)
331     {
332         ExploreTree(poK, aoSetAlreadyVisited, nRecLevel + 1);
333     }
334 }
335 
336 /************************************************************************/
337 /*                        GetGeometryFromMCID()                         */
338 /************************************************************************/
339 
GetGeometryFromMCID(int nMCID)340 OGRGeometry* PDFDataset::GetGeometryFromMCID(int nMCID)
341 {
342     std::map<int,OGRGeometry*>::iterator oMapIter = oMapMCID.find(nMCID);
343     if (oMapIter != oMapMCID.end())
344         return oMapIter->second;
345     else
346         return nullptr;
347 }
348 
349 /************************************************************************/
350 /*                            GraphicState                              */
351 /************************************************************************/
352 
353 class GraphicState
354 {
355     public:
356         std::array<double,6> adfCM;
357         std::array<double,3> adfStrokeColor;
358         std::array<double,3> adfFillColor;
359 
GraphicState()360         GraphicState()
361         {
362             adfCM[0] = 1;
363             adfCM[1] = 0;
364             adfCM[2] = 0;
365             adfCM[3] = 1;
366             adfCM[4] = 0;
367             adfCM[5] = 0;
368             adfStrokeColor[0] = 0.0;
369             adfStrokeColor[1] = 0.0;
370             adfStrokeColor[2] = 0.0;
371             adfFillColor[0] = 1.0;
372             adfFillColor[1] = 1.0;
373             adfFillColor[2] = 1.0;
374         }
375 
MultiplyBy(double adfMatrix[6])376         void MultiplyBy(double adfMatrix[6])
377         {
378             /*
379             [ a b 0 ]     [ a' b' 0]     [ aa' + bc'       ab' + bd'       0 ]
380             [ c d 0 ]  *  [ c' d' 0]  =  [ ca' + dc'       cb' + dd'       0 ]
381             [ e f 1 ]     [ e' f' 1]     [ ea' + fc' + e'  eb' + fd' + f'  1 ]
382             */
383 
384             double a = adfCM[0];
385             double b = adfCM[1];
386             double c = adfCM[2];
387             double d = adfCM[3];
388             double e = adfCM[4];
389             double f = adfCM[5];
390             double ap = adfMatrix[0];
391             double bp = adfMatrix[1];
392             double cp = adfMatrix[2];
393             double dp = adfMatrix[3];
394             double ep = adfMatrix[4];
395             double fp = adfMatrix[5];
396             adfCM[0] = a*ap + b*cp;
397             adfCM[1] = a*bp + b*dp;
398             adfCM[2] = c*ap + d*cp;
399             adfCM[3] = c*bp + d*dp;
400             adfCM[4] = e*ap + f*cp + ep;
401             adfCM[5] = e*bp + f*dp + fp;
402         }
403 
ApplyMatrix(double adfCoords[2])404         void ApplyMatrix(double adfCoords[2])
405         {
406             double x = adfCoords[0];
407             double y = adfCoords[1];
408 
409             adfCoords[0] = x * adfCM[0] + y * adfCM[2] + adfCM[4];
410             adfCoords[1] = x * adfCM[1] + y * adfCM[3] + adfCM[5];
411         }
412 };
413 
414 /************************************************************************/
415 /*                         PDFCoordsToSRSCoords()                       */
416 /************************************************************************/
417 
PDFCoordsToSRSCoords(double x,double y,double & X,double & Y)418 void PDFDataset::PDFCoordsToSRSCoords(double x, double y,
419                                             double& X, double &Y)
420 {
421     x = x / dfPageWidth * nRasterXSize;
422     if( bGeoTransformValid )
423         y = (1 - y / dfPageHeight) * nRasterYSize;
424     else
425         y = (y / dfPageHeight) * nRasterYSize;
426 
427     X = adfGeoTransform[0] + x * adfGeoTransform[1] + y * adfGeoTransform[2];
428     Y = adfGeoTransform[3] + x * adfGeoTransform[4] + y * adfGeoTransform[5];
429 
430     if( fabs(X - (int)floor(X + 0.5)) < 1e-8 )
431         X = (int)floor(X + 0.5);
432     if( fabs(Y - (int)floor(Y + 0.5)) < 1e-8 )
433         Y = (int)floor(Y + 0.5);
434 }
435 
436 /************************************************************************/
437 /*                         PDFGetCircleCenter()                         */
438 /************************************************************************/
439 
440 /* Return the center of a circle, or NULL if it is not recognized */
441 
PDFGetCircleCenter(OGRLineString * poLS)442 static OGRPoint* PDFGetCircleCenter(OGRLineString* poLS)
443 {
444     if (poLS == nullptr || poLS->getNumPoints() != 1 + 4 * BEZIER_STEPS)
445         return nullptr;
446 
447     if (poLS->getY(0 * BEZIER_STEPS) == poLS->getY(2 * BEZIER_STEPS) &&
448         poLS->getX(1 * BEZIER_STEPS) == poLS->getX(3 * BEZIER_STEPS) &&
449         fabs((poLS->getX(0 * BEZIER_STEPS) + poLS->getX(2 * BEZIER_STEPS)) / 2 - poLS->getX(1 * BEZIER_STEPS)) < EPSILON &&
450         fabs((poLS->getY(1 * BEZIER_STEPS) + poLS->getY(3 * BEZIER_STEPS)) / 2 - poLS->getY(0 * BEZIER_STEPS)) < EPSILON)
451     {
452         return new OGRPoint((poLS->getX(0 * BEZIER_STEPS) + poLS->getX(2 * BEZIER_STEPS)) / 2,
453                             (poLS->getY(1 * BEZIER_STEPS) + poLS->getY(3 * BEZIER_STEPS)) / 2);
454     }
455     return nullptr;
456 }
457 
458 /************************************************************************/
459 /*                         PDFGetSquareCenter()                         */
460 /************************************************************************/
461 
462 /* Return the center of a square, or NULL if it is not recognized */
463 
PDFGetSquareCenter(OGRLineString * poLS)464 static OGRPoint* PDFGetSquareCenter(OGRLineString* poLS)
465 {
466     if (poLS == nullptr || poLS->getNumPoints() < 4 || poLS->getNumPoints() > 5)
467         return nullptr;
468 
469     if (poLS->getX(0) == poLS->getX(3) &&
470         poLS->getY(0) == poLS->getY(1) &&
471         poLS->getX(1) == poLS->getX(2) &&
472         poLS->getY(2) == poLS->getY(3) &&
473         fabs(fabs(poLS->getX(0) - poLS->getX(1)) - fabs(poLS->getY(0) - poLS->getY(3))) < EPSILON)
474     {
475         return new OGRPoint((poLS->getX(0) + poLS->getX(1)) / 2,
476                             (poLS->getY(0) + poLS->getY(3)) / 2);
477     }
478     return nullptr;
479 }
480 
481 /************************************************************************/
482 /*                        PDFGetTriangleCenter()                        */
483 /************************************************************************/
484 
485 /* Return the center of a equilateral triangle, or NULL if it is not recognized */
486 
PDFGetTriangleCenter(OGRLineString * poLS)487 static OGRPoint* PDFGetTriangleCenter(OGRLineString* poLS)
488 {
489     if (poLS == nullptr || poLS->getNumPoints() < 3 || poLS->getNumPoints() > 4)
490         return nullptr;
491 
492     double dfSqD1 = SQUARE(poLS->getX(0) - poLS->getX(1)) + SQUARE(poLS->getY(0) - poLS->getY(1));
493     double dfSqD2 = SQUARE(poLS->getX(1) - poLS->getX(2)) + SQUARE(poLS->getY(1) - poLS->getY(2));
494     double dfSqD3 = SQUARE(poLS->getX(0) - poLS->getX(2)) + SQUARE(poLS->getY(0) - poLS->getY(2));
495     if (fabs(dfSqD1 - dfSqD2) < EPSILON && fabs(dfSqD2 - dfSqD3) < EPSILON)
496     {
497         return new OGRPoint((poLS->getX(0) + poLS->getX(1) + poLS->getX(2)) / 3,
498                             (poLS->getY(0) + poLS->getY(1) + poLS->getY(2)) / 3);
499     }
500     return nullptr;
501 }
502 
503 /************************************************************************/
504 /*                          PDFGetStarCenter()                          */
505 /************************************************************************/
506 
507 /* Return the center of a 5-point star, or NULL if it is not recognized */
508 
PDFGetStarCenter(OGRLineString * poLS)509 static OGRPoint* PDFGetStarCenter(OGRLineString* poLS)
510 {
511     if (poLS == nullptr || poLS->getNumPoints() < 10 || poLS->getNumPoints() > 11)
512         return nullptr;
513 
514     double dfSqD01 = SQUARE(poLS->getX(0) - poLS->getX(1)) +
515                      SQUARE(poLS->getY(0) - poLS->getY(1));
516     double dfSqD02 = SQUARE(poLS->getX(0) - poLS->getX(2)) +
517                        SQUARE(poLS->getY(0) - poLS->getY(2));
518     double dfSqD13 = SQUARE(poLS->getX(1) - poLS->getX(3)) +
519                       SQUARE(poLS->getY(1) - poLS->getY(3));
520     const double dfSin18divSin126 = 0.38196601125;
521     if( dfSqD02 == 0 )
522         return nullptr;
523     int bOK = fabs(dfSqD13 / dfSqD02 - SQUARE(dfSin18divSin126)) < EPSILON;
524     for(int i=1;i<10 && bOK;i++)
525     {
526         double dfSqDiip1 = SQUARE(poLS->getX(i) - poLS->getX((i+1)%10)) +
527                            SQUARE(poLS->getY(i) - poLS->getY((i+1)%10));
528         if (fabs(dfSqDiip1 - dfSqD01) > EPSILON)
529         {
530             bOK = FALSE;
531         }
532         double dfSqDiip2 = SQUARE(poLS->getX(i) - poLS->getX((i+2)%10)) +
533                            SQUARE(poLS->getY(i) - poLS->getY((i+2)%10));
534         if ( (i%2) == 1 && fabs(dfSqDiip2 - dfSqD13) > EPSILON )
535         {
536             bOK = FALSE;
537         }
538         if ( (i%2) == 0 && fabs(dfSqDiip2 - dfSqD02) > EPSILON )
539         {
540             bOK = FALSE;
541         }
542     }
543     if (bOK)
544     {
545         return new OGRPoint((poLS->getX(0) + poLS->getX(2) + poLS->getX(4) +
546                              poLS->getX(6) + poLS->getX(8)) / 5,
547                             (poLS->getY(0) + poLS->getY(2) + poLS->getY(4) +
548                              poLS->getY(6) + poLS->getY(8)) / 5);
549     }
550     return nullptr;
551 }
552 
553 /************************************************************************/
554 /*                            UnstackTokens()                           */
555 /************************************************************************/
556 
UnstackTokens(const char * pszToken,int nRequiredArgs,char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE],int & nTokenStackSize,double * adfCoords)557 int PDFDataset::UnstackTokens(const char* pszToken,
558                                     int nRequiredArgs,
559                                     char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE],
560                                     int& nTokenStackSize,
561                                     double* adfCoords)
562 {
563     if (nTokenStackSize < nRequiredArgs)
564     {
565         CPLDebug("PDF", "not enough arguments for %s", pszToken);
566         return FALSE;
567     }
568     nTokenStackSize -= nRequiredArgs;
569     for(int i=0;i<nRequiredArgs;i++)
570     {
571         adfCoords[i] = CPLAtof(aszTokenStack[nTokenStackSize+i]);
572     }
573     return TRUE;
574 }
575 
576 /************************************************************************/
577 /*                           AddBezierCurve()                           */
578 /************************************************************************/
579 
AddBezierCurve(std::vector<double> & oCoords,const double * x0_y0,const double * x1_y1,const double * x2_y2,const double * x3_y3)580 static void AddBezierCurve(std::vector<double>& oCoords,
581                            const double* x0_y0,
582                            const double* x1_y1,
583                            const double* x2_y2,
584                            const double* x3_y3)
585 {
586     double x0 = x0_y0[0];
587     double y0 = x0_y0[1];
588     double x1 = x1_y1[0];
589     double y1 = x1_y1[1];
590     double x2 = x2_y2[0];
591     double y2 = x2_y2[1];
592     double x3 = x3_y3[0];
593     double y3 = x3_y3[1];
594     for( int i = 1; i < BEZIER_STEPS; i++ )
595     {
596         const double t = static_cast<double>(i) / BEZIER_STEPS;
597         const double t2 = t * t;
598         const double t3 = t2 * t;
599         const double oneMinust = 1 - t;
600         const double oneMinust2 = oneMinust * oneMinust;
601         const double oneMinust3 = oneMinust2 * oneMinust;
602         const double three_t_oneMinust = 3 * t * oneMinust;
603         const double x = oneMinust3 * x0 + three_t_oneMinust * (oneMinust * x1 + t * x2) + t3 * x3;
604         const double y = oneMinust3 * y0 + three_t_oneMinust * (oneMinust * y1 + t * y2) + t3 * y3;
605         oCoords.push_back(x);
606         oCoords.push_back(y);
607     }
608     oCoords.push_back(x3);
609     oCoords.push_back(y3);
610 }
611 
612 /************************************************************************/
613 /*                           ParseContent()                             */
614 /************************************************************************/
615 
616 #define NEW_SUBPATH -99
617 #define CLOSE_SUBPATH -98
618 #define FILL_SUBPATH -97
619 
ParseContent(const char * pszContent,GDALPDFObject * poResources,int bInitBDCStack,int bMatchQ,std::map<CPLString,OGRPDFLayer * > & oMapPropertyToLayer,OGRPDFLayer * poCurLayer)620 OGRGeometry* PDFDataset::ParseContent(const char* pszContent,
621                                             GDALPDFObject* poResources,
622                                             int bInitBDCStack,
623                                             int bMatchQ,
624                                             std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer,
625                                             OGRPDFLayer* poCurLayer)
626 {
627 
628 #define PUSH(aszTokenStack, str, strlen) \
629     do \
630     { \
631         if(nTokenStackSize < TOKEN_STACK_SIZE) \
632             memcpy(aszTokenStack[nTokenStackSize ++], str, strlen + 1); \
633         else \
634         { \
635             CPLError(CE_Failure, CPLE_AppDefined, \
636                      "Max token stack size reached"); \
637             return nullptr; \
638         }; \
639     } while( false )
640 
641 #define ADD_CHAR(szToken, c) \
642     do \
643     { \
644         if(nTokenSize < MAX_TOKEN_SIZE-1) \
645         { \
646             szToken[nTokenSize ++ ] = c; \
647             szToken[nTokenSize ] = '\0'; \
648         } \
649         else \
650         { \
651             CPLError(CE_Failure, CPLE_AppDefined, "Max token size reached");\
652             return nullptr; \
653         }; \
654     } while( false )
655 
656     char szToken[MAX_TOKEN_SIZE];
657     int nTokenSize = 0;
658     char ch;
659     char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE];
660     int nTokenStackSize = 0;
661     int bInString = FALSE;
662     int nBDCLevel = 0;
663     int nParenthesisLevel = 0;
664     int nArrayLevel = 0;
665     int nBTLevel = 0;
666 
667     int bCollectAllObjects = poResources != nullptr && !bInitBDCStack && !bMatchQ;
668 
669     GraphicState oGS;
670     std::stack<GraphicState> oGSStack;
671     std::stack<OGRPDFLayer*> oLayerStack;
672 
673     std::vector<double> oCoords;
674     int bHasFoundFill = FALSE;
675     int bHasMultiPart = FALSE;
676 
677     szToken[0] = '\0';
678 
679     if (bInitBDCStack)
680     {
681         PUSH(aszTokenStack, "dummy", 5);
682         PUSH(aszTokenStack, "dummy", 5);
683         oLayerStack.push(nullptr);
684     }
685 
686     while((ch = *pszContent) != '\0')
687     {
688         int bPushToken = FALSE;
689 
690         if (!bInString && ch == '%')
691         {
692             /* Skip comments until end-of-line */
693             while((ch = *pszContent) != '\0')
694             {
695                 if (ch == '\r' || ch == '\n')
696                     break;
697                 pszContent ++;
698             }
699             if (ch == 0)
700                 break;
701         }
702         else if (!bInString && (ch == ' ' || ch == '\r' || ch == '\n'))
703         {
704             bPushToken = TRUE;
705         }
706 
707         /* Ignore arrays */
708         else if (!bInString && nTokenSize == 0 && ch == '[')
709         {
710             nArrayLevel ++;
711         }
712         else if (!bInString && nArrayLevel && ch == ']')
713         {
714             nArrayLevel --;
715         }
716 
717         else if (!bInString && nTokenSize == 0 && ch == '(')
718         {
719             bInString = TRUE;
720             nParenthesisLevel ++;
721             ADD_CHAR(szToken, ch);
722         }
723         else if (bInString && ch == '(')
724         {
725             nParenthesisLevel ++;
726             ADD_CHAR(szToken, ch);
727         }
728         else if (bInString && ch == ')')
729         {
730             nParenthesisLevel --;
731             ADD_CHAR(szToken, ch);
732             if (nParenthesisLevel == 0)
733             {
734                 bInString = FALSE;
735                 bPushToken = TRUE;
736             }
737         }
738         else if( bInString && ch == '\\' )
739         {
740             const auto nextCh = pszContent[1];
741             if( nextCh == 'n' )
742             {
743                 ADD_CHAR(szToken, '\n');
744                 pszContent ++;
745             }
746             else if( nextCh == 'r' )
747             {
748                 ADD_CHAR(szToken, '\r');
749                 pszContent ++;
750             }
751             else if( nextCh == 't' )
752             {
753                 ADD_CHAR(szToken, '\t');
754                 pszContent ++;
755             }
756             else if( nextCh == 'b' )
757             {
758                 ADD_CHAR(szToken, '\b');
759                 pszContent ++;
760             }
761             else if( nextCh == '(' || nextCh == ')' || nextCh == '\\' )
762             {
763                 ADD_CHAR(szToken, nextCh);
764                 pszContent ++;
765             }
766             else if( nextCh >= '0' && nextCh <= '7' &&
767                      pszContent[2] >= '0' && pszContent[2] <= '7' &&
768                      pszContent[3] >= '0' && pszContent[3] <= '7' )
769             {
770                 ADD_CHAR(szToken,
771                          ((nextCh - '\0') * 64 + (pszContent[2] - '\0') * 8 + pszContent[3] - '\0'));
772                 pszContent += 3;
773             }
774             else if( nextCh == '\n' )
775             {
776                 if( pszContent[2] == '\r' )
777                     pszContent += 2;
778                 else
779                     pszContent ++;
780             }
781             else if( nextCh == '\r' )
782             {
783                 pszContent ++;
784             }
785         }
786         else if (ch == '<' && pszContent[1] == '<' && nTokenSize == 0)
787         {
788             int nDictDepth = 0;
789 
790             while(*pszContent != '\0')
791             {
792                 if (pszContent[0] == '<' && pszContent[1] == '<')
793                 {
794                     ADD_CHAR(szToken, '<');
795                     ADD_CHAR(szToken, '<');
796                     nDictDepth ++;
797                     pszContent += 2;
798                 }
799                 else if (pszContent[0] == '>' && pszContent[1] == '>')
800                 {
801                     ADD_CHAR(szToken, '>');
802                     ADD_CHAR(szToken, '>');
803                     nDictDepth --;
804                     pszContent += 2;
805                     if (nDictDepth == 0)
806                         break;
807                 }
808                 else
809                 {
810                     ADD_CHAR(szToken, *pszContent);
811                     pszContent ++;
812                 }
813             }
814             if (nDictDepth == 0)
815             {
816                 bPushToken = TRUE;
817                 pszContent --;
818             }
819             else
820                 break;
821         }
822         else
823         {
824             // Do not create too long tokens in arrays, that we will ignore
825             // anyway
826             if( nArrayLevel == 0 || nTokenSize == 0 )
827             {
828                 ADD_CHAR(szToken, ch);
829             }
830         }
831 
832         pszContent ++;
833         if (pszContent[0] == '\0')
834             bPushToken = TRUE;
835 
836 #define EQUAL1(szToken, s) (szToken[0] == s[0] && szToken[1] == '\0')
837 #define EQUAL2(szToken, s) (szToken[0] == s[0] && szToken[1] == s[1] && szToken[2] == '\0')
838 #define EQUAL3(szToken, s) (szToken[0] == s[0] && szToken[1] == s[1] && szToken[2] == s[2] && szToken[3] == '\0')
839 
840         if (bPushToken && nTokenSize)
841         {
842             if (EQUAL2(szToken, "BI"))
843             {
844                 while(*pszContent != '\0')
845                 {
846                     if( pszContent[0] == 'E' && pszContent[1] == 'I' && pszContent[2] == ' ' )
847                     {
848                         break;
849                     }
850                     pszContent ++;
851                 }
852                 if( pszContent[0] == 'E' )
853                     pszContent += 3;
854                 else
855                     return nullptr;
856             }
857             else if (EQUAL3(szToken, "BDC"))
858             {
859                 if (nTokenStackSize < 2)
860                 {
861                     CPLDebug("PDF",
862                                 "not enough arguments for %s",
863                                 szToken);
864                     return nullptr;
865                 }
866                 nTokenStackSize -= 2;
867                 const char* pszOC = aszTokenStack[nTokenStackSize];
868                 const char* pszOCGName = aszTokenStack[nTokenStackSize+1];
869 
870                 nBDCLevel ++;
871 
872                 if( EQUAL3(pszOC, "/OC") && pszOCGName[0] == '/' )
873                 {
874                     std::map<CPLString, OGRPDFLayer*>::iterator oIter =
875                         oMapPropertyToLayer.find(pszOCGName + 1);
876                     if( oIter != oMapPropertyToLayer.end() )
877                     {
878                         poCurLayer = oIter->second;
879                         //CPLDebug("PDF", "Cur layer : %s", poCurLayer->GetName());
880                     }
881                 }
882 
883                 oLayerStack.push(poCurLayer);
884                 //CPLDebug("PDF", "%s %s BDC", osOC.c_str(), osOCGName.c_str());
885             }
886             else if (EQUAL3(szToken, "EMC"))
887             {
888                 //CPLDebug("PDF", "EMC");
889                 if( !oLayerStack.empty() )
890                 {
891                     oLayerStack.pop();
892                     if( !oLayerStack.empty() )
893                         poCurLayer = oLayerStack.top();
894                     else
895                         poCurLayer = nullptr;
896 
897                     /*if (poCurLayer)
898                     {
899                         CPLDebug("PDF", "Cur layer : %s", poCurLayer->GetName());
900                     }*/
901                 }
902                 else
903                 {
904                     CPLDebug("PDF", "Should not happen at line %d", __LINE__);
905                     poCurLayer = nullptr;
906                     //return NULL;
907                 }
908 
909                 nBDCLevel --;
910                 if (nBDCLevel == 0 && bInitBDCStack)
911                     break;
912             }
913 
914             /* Ignore any text stuff */
915             else if (EQUAL2(szToken, "BT"))
916                 nBTLevel ++;
917             else if (EQUAL2(szToken, "ET"))
918             {
919                 nBTLevel --;
920                 if (nBTLevel < 0)
921                 {
922                     CPLDebug("PDF", "Should not happen at line %d", __LINE__);
923                     return nullptr;
924                 }
925             }
926             else if (!nArrayLevel && !nBTLevel)
927             {
928                 int bEmitFeature = FALSE;
929 
930                 if( szToken[0] < 'A' )
931                 {
932                     PUSH(aszTokenStack, szToken, nTokenSize);
933                 }
934                 else if (EQUAL1(szToken, "q"))
935                 {
936                     oGSStack.push(oGS);
937                 }
938                 else if (EQUAL1(szToken, "Q"))
939                 {
940                     if (oGSStack.empty())
941                     {
942                         CPLDebug("PDF", "not enough arguments for %s", szToken);
943                         return nullptr;
944                     }
945 
946                     oGS = oGSStack.top();
947                     oGSStack.pop();
948 
949                     if (oGSStack.empty() && bMatchQ)
950                         break;
951                 }
952                 else if (EQUAL2(szToken, "cm"))
953                 {
954                     double adfMatrix[6];
955                     if (!UnstackTokens(szToken, 6, aszTokenStack, nTokenStackSize, adfMatrix))
956                     {
957                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
958                         return nullptr;
959                     }
960 
961                     oGS.MultiplyBy(adfMatrix);
962                 }
963                 else if (EQUAL1(szToken, "b") || /* closepath, fill, stroke */
964                          EQUAL2(szToken, "b*")   /* closepath, eofill, stroke */)
965                 {
966                     if (!(!oCoords.empty() &&
967                           oCoords[oCoords.size() - 2] == CLOSE_SUBPATH &&
968                           oCoords.back() == CLOSE_SUBPATH))
969                     {
970                         oCoords.push_back(CLOSE_SUBPATH);
971                         oCoords.push_back(CLOSE_SUBPATH);
972                     }
973                     oCoords.push_back(FILL_SUBPATH);
974                     oCoords.push_back(FILL_SUBPATH);
975                     bHasFoundFill = TRUE;
976 
977                     bEmitFeature = TRUE;
978                 }
979                 else if (EQUAL1(szToken, "B") ||  /* fill, stroke */
980                          EQUAL2(szToken, "B*") || /* eofill, stroke */
981                          EQUAL1(szToken, "f") ||  /* fill */
982                          EQUAL1(szToken, "F") ||  /* fill */
983                          EQUAL2(szToken, "f*")    /* eofill */ )
984                 {
985                     oCoords.push_back(FILL_SUBPATH);
986                     oCoords.push_back(FILL_SUBPATH);
987                     bHasFoundFill = TRUE;
988 
989                     bEmitFeature = TRUE;
990                 }
991                 else if (EQUAL1(szToken, "h")) /* close subpath */
992                 {
993                     if (!(!oCoords.empty() &&
994                           oCoords[oCoords.size() - 2] == CLOSE_SUBPATH &&
995                           oCoords.back() == CLOSE_SUBPATH))
996                     {
997                         oCoords.push_back(CLOSE_SUBPATH);
998                         oCoords.push_back(CLOSE_SUBPATH);
999                     }
1000                 }
1001                 else if (EQUAL1(szToken, "n")) /* new subpath without stroking or filling */
1002                 {
1003                     oCoords.resize(0);
1004                 }
1005                 else if (EQUAL1(szToken, "s")) /* close and stroke */
1006                 {
1007                     if (!(!oCoords.empty() &&
1008                           oCoords[oCoords.size() - 2] == CLOSE_SUBPATH &&
1009                           oCoords.back() == CLOSE_SUBPATH))
1010                     {
1011                         oCoords.push_back(CLOSE_SUBPATH);
1012                         oCoords.push_back(CLOSE_SUBPATH);
1013                     }
1014 
1015                     bEmitFeature = TRUE;
1016                 }
1017                 else if (EQUAL1(szToken, "S")) /* stroke */
1018                 {
1019                     bEmitFeature = TRUE;
1020                 }
1021                 else if (EQUAL1(szToken, "m") || EQUAL1(szToken, "l"))
1022                 {
1023                     double adfCoords[2];
1024                     if (!UnstackTokens(szToken, 2, aszTokenStack, nTokenStackSize, adfCoords))
1025                     {
1026                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1027                         return nullptr;
1028                     }
1029 
1030                     if (EQUAL1(szToken, "m"))
1031                     {
1032                         if (!oCoords.empty())
1033                             bHasMultiPart = TRUE;
1034                         oCoords.push_back(NEW_SUBPATH);
1035                         oCoords.push_back(NEW_SUBPATH);
1036                     }
1037 
1038                     oGS.ApplyMatrix(adfCoords);
1039                     oCoords.push_back(adfCoords[0]);
1040                     oCoords.push_back(adfCoords[1]);
1041                 }
1042                 else if (EQUAL1(szToken, "c")) /* Bezier curve */
1043                 {
1044                     double adfCoords[6];
1045                     if (!UnstackTokens(szToken, 6, aszTokenStack, nTokenStackSize, adfCoords))
1046                     {
1047                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1048                         return nullptr;
1049                     }
1050 
1051                     oGS.ApplyMatrix(adfCoords + 0);
1052                     oGS.ApplyMatrix(adfCoords + 2);
1053                     oGS.ApplyMatrix(adfCoords + 4);
1054                     AddBezierCurve(oCoords,
1055                                    oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1056                                    &adfCoords[0],
1057                                    &adfCoords[2],
1058                                    &adfCoords[4]);
1059                 }
1060                 else if (EQUAL1(szToken, "v")) /* Bezier curve */
1061                 {
1062                     double adfCoords[4];
1063                     if (!UnstackTokens(szToken, 4, aszTokenStack, nTokenStackSize, adfCoords))
1064                     {
1065                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1066                         return nullptr;
1067                     }
1068 
1069                     oGS.ApplyMatrix(adfCoords + 0);
1070                     oGS.ApplyMatrix(adfCoords + 2);
1071                     AddBezierCurve(oCoords,
1072                                    oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1073                                    oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1074                                    &adfCoords[0],
1075                                    &adfCoords[2]);
1076                 }
1077                 else if (EQUAL1(szToken, "y")) /* Bezier curve */
1078                 {
1079                     double adfCoords[4];
1080                     if (!UnstackTokens(szToken, 4, aszTokenStack, nTokenStackSize, adfCoords))
1081                     {
1082                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1083                         return nullptr;
1084                     }
1085 
1086                     oGS.ApplyMatrix(adfCoords + 0);
1087                     oGS.ApplyMatrix(adfCoords + 2);
1088                     AddBezierCurve(oCoords,
1089                                    oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1090                                    &adfCoords[0],
1091                                    &adfCoords[2],
1092                                    &adfCoords[2]);
1093                 }
1094                 else if (EQUAL2(szToken, "re")) /* Rectangle */
1095                 {
1096                     double adfCoords[4];
1097                     if (!UnstackTokens(szToken, 4, aszTokenStack, nTokenStackSize, adfCoords))
1098                     {
1099                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1100                         return nullptr;
1101                     }
1102 
1103                     adfCoords[2] += adfCoords[0];
1104                     adfCoords[3] += adfCoords[1];
1105 
1106                     oGS.ApplyMatrix(adfCoords);
1107                     oGS.ApplyMatrix(adfCoords + 2);
1108 
1109                     if (!oCoords.empty())
1110                         bHasMultiPart = TRUE;
1111                     oCoords.push_back(NEW_SUBPATH);
1112                     oCoords.push_back(NEW_SUBPATH);
1113                     oCoords.push_back(adfCoords[0]);
1114                     oCoords.push_back(adfCoords[1]);
1115                     oCoords.push_back(adfCoords[2]);
1116                     oCoords.push_back(adfCoords[1]);
1117                     oCoords.push_back(adfCoords[2]);
1118                     oCoords.push_back(adfCoords[3]);
1119                     oCoords.push_back(adfCoords[0]);
1120                     oCoords.push_back(adfCoords[3]);
1121                     oCoords.push_back(CLOSE_SUBPATH);
1122                     oCoords.push_back(CLOSE_SUBPATH);
1123                 }
1124 
1125                 else if (EQUAL2(szToken, "Do"))
1126                 {
1127                     if (nTokenStackSize == 0)
1128                     {
1129                         CPLDebug("PDF",
1130                                  "not enough arguments for %s",
1131                                  szToken);
1132                         return nullptr;
1133                     }
1134 
1135                     CPLString osObjectName = aszTokenStack[--nTokenStackSize];
1136 
1137                     if (osObjectName[0] != '/')
1138                     {
1139                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1140                         return nullptr;
1141                     }
1142 
1143                     if (poResources == nullptr)
1144                     {
1145                         if (osObjectName.find("/SymImage") == 0)
1146                         {
1147                             oCoords.push_back(oGS.adfCM[4] + oGS.adfCM[0] / 2);
1148                             oCoords.push_back(oGS.adfCM[5] + oGS.adfCM[3] / 2);
1149 
1150                             szToken[0] = '\0';
1151                             nTokenSize = 0;
1152 
1153                             if( poCurLayer != nullptr)
1154                                 bEmitFeature = TRUE;
1155                             else
1156                                 continue;
1157                         }
1158                         else
1159                         {
1160                             //CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1161                             return nullptr;
1162                         }
1163                     }
1164 
1165                     if( !bEmitFeature )
1166                     {
1167                         GDALPDFObject* poXObject =
1168                             poResources->GetDictionary()->Get("XObject");
1169                         if (poXObject == nullptr ||
1170                             poXObject->GetType() != PDFObjectType_Dictionary)
1171                         {
1172                             CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1173                             return nullptr;
1174                         }
1175 
1176                         GDALPDFObject* poObject =
1177                             poXObject->GetDictionary()->Get(osObjectName.c_str() + 1);
1178                         if (poObject == nullptr)
1179                         {
1180                             CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1181                             return nullptr;
1182                         }
1183 
1184                         int bParseStream = TRUE;
1185                         /* Check if the object is an image. If so, no need to try to parse */
1186                         /* it. */
1187                         if (poObject->GetType() == PDFObjectType_Dictionary)
1188                         {
1189                             GDALPDFObject* poSubtype = poObject->GetDictionary()->Get("Subtype");
1190                             if (poSubtype != nullptr &&
1191                                 poSubtype->GetType() == PDFObjectType_Name &&
1192                                 poSubtype->GetName() == "Image" )
1193                             {
1194                                 bParseStream = FALSE;
1195                             }
1196                         }
1197 
1198                         if( bParseStream )
1199                         {
1200                             GDALPDFStream* poStream = poObject->GetStream();
1201                             if (!poStream)
1202                             {
1203                                 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1204                                 return nullptr;
1205                             }
1206 
1207                             char* pszStr = poStream->GetBytes();
1208                             if( pszStr )
1209                             {
1210                                 OGRGeometry* poGeom = ParseContent(pszStr, nullptr, FALSE, FALSE,
1211                                                                 oMapPropertyToLayer, poCurLayer);
1212                                 CPLFree(pszStr);
1213                                 if (poGeom && !bCollectAllObjects)
1214                                     return poGeom;
1215                                 delete poGeom;
1216                             }
1217                         }
1218                     }
1219                 }
1220                 else if( EQUAL2(szToken, "RG") || EQUAL2(szToken, "rg") )
1221                 {
1222                     double* padf = ( EQUAL2(szToken, "RG") ) ? &oGS.adfStrokeColor[0] : &oGS.adfFillColor[0];
1223                     if (!UnstackTokens(szToken, 3, aszTokenStack, nTokenStackSize, padf))
1224                     {
1225                         CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1226                         return nullptr;
1227                     }
1228                 }
1229                 else if (oMapOperators.find(szToken) != oMapOperators.end())
1230                 {
1231                     int nArgs = oMapOperators[szToken];
1232                     if (nArgs < 0)
1233                     {
1234                         while( nTokenStackSize != 0 )
1235                         {
1236                             CPLString osTopToken = aszTokenStack[--nTokenStackSize];
1237                             if (oMapOperators.find(osTopToken) != oMapOperators.end())
1238                                 break;
1239                         }
1240                     }
1241                     else
1242                     {
1243                         if( nArgs > nTokenStackSize )
1244                         {
1245                             CPLDebug("PDF",
1246                                     "not enough arguments for %s",
1247                                     szToken);
1248                             return nullptr;
1249                         }
1250                         nTokenStackSize -= nArgs;
1251                     }
1252                 }
1253                 else
1254                 {
1255                     PUSH(aszTokenStack, szToken, nTokenSize);
1256                 }
1257 
1258                 if( bEmitFeature && poCurLayer != nullptr)
1259                 {
1260                     OGRGeometry* poGeom = BuildGeometry(oCoords, bHasFoundFill, bHasMultiPart);
1261                     bHasFoundFill = FALSE;
1262                     bHasMultiPart = FALSE;
1263                     if (poGeom)
1264                     {
1265                         OGRFeature* poFeature = new OGRFeature(poCurLayer->GetLayerDefn());
1266                         if( bSetStyle )
1267                         {
1268                             OGRwkbGeometryType eType = wkbFlatten(poGeom->getGeometryType());
1269                             if( eType == wkbLineString || eType == wkbMultiLineString )
1270                             {
1271                                 poFeature->SetStyleString(CPLSPrintf("PEN(c:#%02X%02X%02X)",
1272                                                                     (int)(oGS.adfStrokeColor[0] * 255 + 0.5),
1273                                                                     (int)(oGS.adfStrokeColor[1] * 255 + 0.5),
1274                                                                     (int)(oGS.adfStrokeColor[2] * 255 + 0.5)));
1275                             }
1276                             else if( eType == wkbPolygon || eType == wkbMultiPolygon )
1277                             {
1278                                 poFeature->SetStyleString(CPLSPrintf("PEN(c:#%02X%02X%02X);BRUSH(fc:#%02X%02X%02X)",
1279                                                                     (int)(oGS.adfStrokeColor[0] * 255 + 0.5),
1280                                                                     (int)(oGS.adfStrokeColor[1] * 255 + 0.5),
1281                                                                     (int)(oGS.adfStrokeColor[2] * 255 + 0.5),
1282                                                                     (int)(oGS.adfFillColor[0] * 255 + 0.5),
1283                                                                     (int)(oGS.adfFillColor[1] * 255 + 0.5),
1284                                                                     (int)(oGS.adfFillColor[2] * 255 + 0.5)));
1285                             }
1286                         }
1287                         poGeom->assignSpatialReference(poCurLayer->GetSpatialRef());
1288                         poFeature->SetGeometryDirectly(poGeom);
1289                         CPL_IGNORE_RET_VAL(poCurLayer->CreateFeature(poFeature));
1290                         delete poFeature;
1291                     }
1292 
1293                     oCoords.resize(0);
1294                 }
1295             }
1296 
1297             szToken[0] = '\0';
1298             nTokenSize = 0;
1299         }
1300     }
1301 
1302     if (nTokenStackSize != 0)
1303     {
1304         while(nTokenStackSize != 0)
1305         {
1306             nTokenStackSize--;
1307             CPLDebug("PDF",
1308                      "Remaining values in stack : %s",
1309                      aszTokenStack[nTokenStackSize]);
1310         }
1311         return  nullptr;
1312     }
1313 
1314     if (bCollectAllObjects)
1315         return nullptr;
1316 
1317     return BuildGeometry(oCoords, bHasFoundFill, bHasMultiPart);
1318 }
1319 
1320 /************************************************************************/
1321 /*                           BuildGeometry()                            */
1322 /************************************************************************/
1323 
BuildGeometry(std::vector<double> & oCoords,int bHasFoundFill,int bHasMultiPart)1324 OGRGeometry* PDFDataset::BuildGeometry(std::vector<double>& oCoords,
1325                                              int bHasFoundFill,
1326                                              int bHasMultiPart)
1327 {
1328     OGRGeometry* poGeom = nullptr;
1329 
1330     if (!oCoords.size())
1331         return nullptr;
1332 
1333     if (oCoords.size() == 2)
1334     {
1335         double X, Y;
1336         PDFCoordsToSRSCoords(oCoords[0], oCoords[1], X, Y);
1337         poGeom = new OGRPoint(X, Y);
1338     }
1339     else if (!bHasFoundFill)
1340     {
1341         OGRLineString* poLS = nullptr;
1342         OGRMultiLineString* poMLS = nullptr;
1343         if (bHasMultiPart)
1344         {
1345             poMLS = new OGRMultiLineString();
1346             poGeom = poMLS;
1347         }
1348 
1349         for(size_t i=0;i<oCoords.size();i+=2)
1350         {
1351             if (oCoords[i] == NEW_SUBPATH && oCoords[i+1] == NEW_SUBPATH)
1352             {
1353                 if (poMLS)
1354                 {
1355                     poLS = new OGRLineString();
1356                     poMLS->addGeometryDirectly(poLS);
1357                 }
1358                 else
1359                 {
1360                     delete poLS;
1361                     poLS = new OGRLineString();
1362                     poGeom = poLS;
1363                 }
1364             }
1365             else if (oCoords[i] == CLOSE_SUBPATH && oCoords[i+1] == CLOSE_SUBPATH)
1366             {
1367                 if (poLS && poLS->getNumPoints() >= 2 &&
1368                     !(poLS->getX(0) == poLS->getX(poLS->getNumPoints()-1) &&
1369                         poLS->getY(0) == poLS->getY(poLS->getNumPoints()-1)))
1370                 {
1371                     poLS->addPoint(poLS->getX(0), poLS->getY(0));
1372                 }
1373             }
1374             else if (oCoords[i] == FILL_SUBPATH && oCoords[i+1] == FILL_SUBPATH)
1375             {
1376                 /* Should not happen */
1377             }
1378             else
1379             {
1380                 if (poLS)
1381                 {
1382                     double X, Y;
1383                     PDFCoordsToSRSCoords(oCoords[i], oCoords[i+1], X, Y);
1384 
1385                     poLS->addPoint(X, Y);
1386                 }
1387             }
1388         }
1389 
1390         // Recognize points as written by GDAL (ogr-sym-2 : circle (not filled))
1391         OGRGeometry* poCenter = nullptr;
1392         if (poCenter == nullptr && poLS != nullptr && poLS->getNumPoints() == 1 + BEZIER_STEPS * 4 )
1393         {
1394             poCenter = PDFGetCircleCenter(poLS);
1395         }
1396 
1397         // Recognize points as written by GDAL (ogr-sym-4: square (not filled))
1398         if (poCenter == nullptr && poLS != nullptr && (poLS->getNumPoints() == 4 || poLS->getNumPoints() == 5))
1399         {
1400             poCenter = PDFGetSquareCenter(poLS);
1401         }
1402 
1403         // Recognize points as written by GDAL (ogr-sym-6: triangle (not filled))
1404         if (poCenter == nullptr && poLS != nullptr && (poLS->getNumPoints() == 3 || poLS->getNumPoints() == 4))
1405         {
1406             poCenter = PDFGetTriangleCenter(poLS);
1407         }
1408 
1409         // Recognize points as written by GDAL (ogr-sym-8: star (not filled))
1410         if (poCenter == nullptr && poLS != nullptr && (poLS->getNumPoints() == 10 || poLS->getNumPoints() == 11))
1411         {
1412             poCenter = PDFGetStarCenter(poLS);
1413         }
1414 
1415         if (poCenter == nullptr && poMLS != nullptr && poMLS->getNumGeometries() == 2)
1416         {
1417             const OGRLineString* poLS1 = poMLS->getGeometryRef(0);
1418             const OGRLineString* poLS2 = poMLS->getGeometryRef(1);
1419 
1420             // Recognize points as written by GDAL (ogr-sym-0: cross (+) ).
1421             if (poLS1->getNumPoints() == 2 && poLS2->getNumPoints() == 2 &&
1422                 poLS1->getY(0) == poLS1->getY(1) &&
1423                 poLS2->getX(0) == poLS2->getX(1) &&
1424                 fabs(fabs(poLS1->getX(0) - poLS1->getX(1)) - fabs(poLS2->getY(0) - poLS2->getY(1))) < EPSILON &&
1425                 fabs((poLS1->getX(0) + poLS1->getX(1)) / 2 - poLS2->getX(0)) < EPSILON &&
1426                 fabs((poLS2->getY(0) + poLS2->getY(1)) / 2 - poLS1->getY(0)) < EPSILON)
1427             {
1428                 poCenter = new OGRPoint(poLS2->getX(0), poLS1->getY(0));
1429             }
1430             // Recognize points as written by GDAL (ogr-sym-1: diagcross (X) ).
1431             else if (poLS1->getNumPoints() == 2 && poLS2->getNumPoints() == 2 &&
1432                      poLS1->getX(0) == poLS2->getX(0) &&
1433                      poLS1->getY(0) == poLS2->getY(1) &&
1434                      poLS1->getX(1) == poLS2->getX(1) &&
1435                      poLS1->getY(1) == poLS2->getY(0) &&
1436                      fabs(fabs(poLS1->getX(0) - poLS1->getX(1)) - fabs(poLS1->getY(0) - poLS1->getY(1))) < EPSILON)
1437             {
1438                 poCenter = new OGRPoint((poLS1->getX(0) + poLS1->getX(1)) / 2,
1439                                         (poLS1->getY(0) + poLS1->getY(1)) / 2);
1440             }
1441         }
1442 
1443         if (poCenter)
1444         {
1445             delete poGeom;
1446             poGeom = poCenter;
1447         }
1448     }
1449     else
1450     {
1451         OGRLinearRing* poLS = nullptr;
1452         int nPolys = 0;
1453         OGRGeometry** papoPoly = nullptr;
1454 
1455         for(size_t i=0;i<oCoords.size();i+=2)
1456         {
1457             if (oCoords[i] == NEW_SUBPATH && oCoords[i+1] == NEW_SUBPATH)
1458             {
1459                 if (poLS && poLS->getNumPoints() >= 3)
1460                 {
1461                     OGRPolygon* poPoly =  new OGRPolygon();
1462                     poPoly->addRingDirectly(poLS);
1463                     poLS = nullptr;
1464 
1465                     papoPoly = (OGRGeometry**) CPLRealloc(papoPoly, (nPolys + 1) * sizeof(OGRGeometry*));
1466                     papoPoly[nPolys ++] = poPoly;
1467                 }
1468                 delete poLS;
1469                 poLS = new OGRLinearRing();
1470             }
1471             else if ((oCoords[i] == CLOSE_SUBPATH && oCoords[i+1] == CLOSE_SUBPATH) ||
1472                         (oCoords[i] == FILL_SUBPATH && oCoords[i+1] == FILL_SUBPATH))
1473             {
1474                 if (poLS)
1475                 {
1476                     poLS->closeRings();
1477 
1478                     std::unique_ptr<OGRPoint> poCenter;
1479 
1480                    if (nPolys == 0 &&
1481                         poLS &&
1482                         poLS->getNumPoints() == 1 + BEZIER_STEPS * 4 )
1483                    {
1484                         // Recognize points as written by GDAL (ogr-sym-3 : circle (filled))
1485                         poCenter.reset(PDFGetCircleCenter(poLS));
1486                    }
1487 
1488                     if (nPolys == 0 &&
1489                         poCenter == nullptr &&
1490                         poLS &&
1491                         poLS->getNumPoints() == 5)
1492                     {
1493                         // Recognize points as written by GDAL (ogr-sym-5: square (filled))
1494                         poCenter.reset(PDFGetSquareCenter(poLS));
1495 
1496                         /* ESRI points */
1497                         if (poCenter == nullptr &&
1498                             oCoords.size() == 14 &&
1499                             poLS->getY(0) == poLS->getY(1) &&
1500                             poLS->getX(1) == poLS->getX(2) &&
1501                             poLS->getY(2) == poLS->getY(3) &&
1502                             poLS->getX(3) == poLS->getX(0))
1503                         {
1504                             poCenter.reset(new OGRPoint((poLS->getX(0) + poLS->getX(1)) / 2,
1505                                                     (poLS->getY(0) + poLS->getY(2)) / 2));
1506                         }
1507                     }
1508                     // Recognize points as written by GDAL (ogr-sym-7: triangle (filled))
1509                     else if (nPolys == 0 &&
1510                              poLS &&
1511                              poLS->getNumPoints() == 4)
1512                     {
1513                         poCenter.reset(PDFGetTriangleCenter(poLS));
1514                     }
1515                     // Recognize points as written by GDAL (ogr-sym-9: star (filled))
1516                     else if (nPolys == 0 &&
1517                              poLS &&
1518                              poLS->getNumPoints() == 11)
1519                     {
1520                         poCenter.reset(PDFGetStarCenter(poLS));
1521                     }
1522 
1523                     if (poCenter)
1524                     {
1525                         delete poGeom;
1526                         poGeom = poCenter.release();
1527                         break;
1528                     }
1529 
1530                     if (poLS->getNumPoints() >= 3)
1531                     {
1532                         OGRPolygon* poPoly =  new OGRPolygon();
1533                         poPoly->addRingDirectly(poLS);
1534                         poLS = nullptr;
1535 
1536                         papoPoly = (OGRGeometry**) CPLRealloc(papoPoly, (nPolys + 1) * sizeof(OGRGeometry*));
1537                         papoPoly[nPolys ++] = poPoly;
1538                     }
1539                     else
1540                     {
1541                         delete poLS;
1542                         poLS = nullptr;
1543                     }
1544                 }
1545             }
1546             else
1547             {
1548                 if (poLS)
1549                 {
1550                     double X, Y;
1551                     PDFCoordsToSRSCoords(oCoords[i], oCoords[i+1], X, Y);
1552 
1553                     poLS->addPoint(X, Y);
1554                 }
1555             }
1556         }
1557 
1558         delete poLS;
1559 
1560         int bIsValidGeometry;
1561         if (nPolys == 2 &&
1562             papoPoly[0]->toPolygon()->getNumInteriorRings() == 0 &&
1563             papoPoly[1]->toPolygon()->getNumInteriorRings() == 0)
1564         {
1565             OGRLinearRing* poRing0 = papoPoly[0]->toPolygon()->getExteriorRing();
1566             OGRLinearRing* poRing1 = papoPoly[1]->toPolygon()->getExteriorRing();
1567             if (poRing0->getNumPoints() == poRing1->getNumPoints())
1568             {
1569                 int bSameRing = TRUE;
1570                 for(int i=0;i<poRing0->getNumPoints();i++)
1571                 {
1572                     if (poRing0->getX(i) != poRing1->getX(i))
1573                     {
1574                         bSameRing = FALSE;
1575                         break;
1576                     }
1577                     if (poRing0->getY(i) != poRing1->getY(i))
1578                     {
1579                         bSameRing = FALSE;
1580                         break;
1581                     }
1582                 }
1583 
1584                 /* Just keep on ring if they are identical */
1585                 if (bSameRing)
1586                 {
1587                     delete papoPoly[1];
1588                     nPolys = 1;
1589                 }
1590             }
1591         }
1592         if (nPolys)
1593         {
1594             poGeom = OGRGeometryFactory::organizePolygons(
1595                     papoPoly, nPolys, &bIsValidGeometry, nullptr);
1596         }
1597         CPLFree(papoPoly);
1598     }
1599 
1600     return poGeom;
1601 }
1602 
1603 /************************************************************************/
1604 /*                          ExploreContents()                           */
1605 /************************************************************************/
1606 
ExploreContents(GDALPDFObject * poObj,GDALPDFObject * poResources,int nDepth,int & nVisited,bool & bStop)1607 void PDFDataset::ExploreContents(GDALPDFObject* poObj,
1608                                  GDALPDFObject* poResources,
1609                                  int nDepth,
1610                                  int& nVisited,
1611                                  bool& bStop)
1612 {
1613     std::map<CPLString, OGRPDFLayer*> oMapPropertyToLayer;
1614     if( nDepth == 10 || nVisited == 1000 )
1615     {
1616         CPLError(CE_Failure, CPLE_AppDefined,
1617                  "ExploreContents(): too deep exploration or too many items");
1618         bStop = true;
1619         return;
1620     }
1621     if( bStop )
1622         return;
1623 
1624     if (poObj->GetType() == PDFObjectType_Array)
1625     {
1626         GDALPDFArray* poArray = poObj->GetArray();
1627         for(int i=0;i<poArray->GetLength();i++)
1628         {
1629             GDALPDFObject* poSubObj = poArray->Get(i);
1630             if( poSubObj )
1631             {
1632                 nVisited ++;
1633                 ExploreContents(poSubObj, poResources, nDepth + 1, nVisited, bStop);
1634                 if( bStop )
1635                     return;
1636             }
1637         }
1638     }
1639 
1640     if (poObj->GetType() != PDFObjectType_Dictionary)
1641         return;
1642 
1643     GDALPDFStream* poStream = poObj->GetStream();
1644     if (!poStream)
1645         return;
1646 
1647     char* pszStr = poStream->GetBytes();
1648     if (!pszStr)
1649         return;
1650 
1651     const char* pszMCID = (const char*) pszStr;
1652     while((pszMCID = strstr(pszMCID, "/MCID")) != nullptr)
1653     {
1654         const char* pszBDC = strstr(pszMCID, "BDC");
1655         if (pszBDC)
1656         {
1657             /* Hack for http://www.avenza.com/sites/default/files/spatialpdf/US_County_Populations.pdf */
1658             /* FIXME: that logic is too fragile. */
1659             const char* pszStartParsing = pszBDC;
1660             const char* pszAfterBDC = pszBDC + 3;
1661             int bMatchQ = FALSE;
1662             while (pszAfterBDC[0] == ' ' || pszAfterBDC[0] == '\r' || pszAfterBDC[0] == '\n')
1663                 pszAfterBDC ++;
1664             if (STARTS_WITH(pszAfterBDC, "0 0 m"))
1665             {
1666                 const char* pszLastq = pszBDC;
1667                 while(pszLastq > pszStr && *pszLastq != 'q')
1668                     pszLastq --;
1669 
1670                 if (pszLastq > pszStr && *pszLastq == 'q' &&
1671                     (pszLastq[-1] == ' ' || pszLastq[-1] == '\r' || pszLastq[-1] == '\n') &&
1672                     (pszLastq[1] == ' ' || pszLastq[1] == '\r' || pszLastq[1] == '\n'))
1673                 {
1674                     pszStartParsing = pszLastq;
1675                     bMatchQ = TRUE;
1676                 }
1677             }
1678 
1679             int nMCID = atoi(pszMCID + 6);
1680             if (GetGeometryFromMCID(nMCID) == nullptr)
1681             {
1682                 OGRGeometry* poGeom = ParseContent(pszStartParsing, poResources,
1683                                                    !bMatchQ, bMatchQ, oMapPropertyToLayer, nullptr);
1684                 if( poGeom != nullptr )
1685                 {
1686                     /* Save geometry in map */
1687                     oMapMCID[nMCID] = poGeom;
1688                 }
1689             }
1690         }
1691         pszMCID += 5;
1692     }
1693     CPLFree(pszStr);
1694 }
1695 
1696 /************************************************************************/
1697 /*                   ExploreContentsNonStructured()                     */
1698 /************************************************************************/
1699 
ExploreContentsNonStructuredInternal(GDALPDFObject * poContents,GDALPDFObject * poResources,std::map<CPLString,OGRPDFLayer * > & oMapPropertyToLayer,OGRPDFLayer * poSingleLayer)1700 void PDFDataset::ExploreContentsNonStructuredInternal(GDALPDFObject* poContents,
1701                                                       GDALPDFObject* poResources,
1702                                                       std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer,
1703                                                       OGRPDFLayer* poSingleLayer)
1704 {
1705     if (poContents->GetType() == PDFObjectType_Array)
1706     {
1707         GDALPDFArray* poArray = poContents->GetArray();
1708         char* pszConcatStr = nullptr;
1709         int nConcatLen = 0;
1710         for(int i=0;i<poArray->GetLength();i++)
1711         {
1712             GDALPDFObject* poObj = poArray->Get(i);
1713             if( poObj == nullptr || poObj->GetType() != PDFObjectType_Dictionary)
1714                 break;
1715             GDALPDFStream* poStream = poObj->GetStream();
1716             if (!poStream)
1717                 break;
1718             char* pszStr = poStream->GetBytes();
1719             if (!pszStr)
1720                 break;
1721             int nLen = (int)strlen(pszStr);
1722             char* pszConcatStrNew = (char*)CPLRealloc(pszConcatStr, nConcatLen + nLen + 1);
1723             if( pszConcatStrNew == nullptr )
1724             {
1725                 CPLFree(pszStr);
1726                 break;
1727             }
1728             pszConcatStr = pszConcatStrNew;
1729             memcpy(pszConcatStr + nConcatLen, pszStr, nLen+1);
1730             nConcatLen += nLen;
1731             CPLFree(pszStr);
1732         }
1733         if( pszConcatStr )
1734             ParseContent(pszConcatStr, poResources, FALSE, FALSE, oMapPropertyToLayer, poSingleLayer);
1735         CPLFree(pszConcatStr);
1736         return;
1737     }
1738 
1739     if (poContents->GetType() != PDFObjectType_Dictionary)
1740         return;
1741 
1742     GDALPDFStream* poStream = poContents->GetStream();
1743     if (!poStream)
1744         return;
1745 
1746     char* pszStr = poStream->GetBytes();
1747     if( !pszStr )
1748         return;
1749     ParseContent(pszStr, poResources, FALSE, FALSE, oMapPropertyToLayer, poSingleLayer);
1750     CPLFree(pszStr);
1751 }
1752 
ExploreContentsNonStructured(GDALPDFObject * poContents,GDALPDFObject * poResources)1753 void PDFDataset::ExploreContentsNonStructured(GDALPDFObject* poContents,
1754                                                     GDALPDFObject* poResources)
1755 {
1756     std::map<CPLString, OGRPDFLayer*> oMapPropertyToLayer;
1757     if (poResources != nullptr &&
1758         poResources->GetType() == PDFObjectType_Dictionary)
1759     {
1760         GDALPDFObject* poProperties =
1761             poResources->GetDictionary()->Get("Properties");
1762         if (poProperties != nullptr &&
1763             poProperties->GetType() == PDFObjectType_Dictionary)
1764         {
1765             std::map< std::pair<int, int>, OGRPDFLayer *> oMapNumGenToLayer;
1766             for(const auto& oLayerWithref: aoLayerWithRef )
1767             {
1768                 CPLString osSanitizedName(PDFSanitizeLayerName(oLayerWithref.osName));
1769 
1770                 OGRPDFLayer* poLayer = (OGRPDFLayer*) GetLayerByName(osSanitizedName.c_str());
1771                 if (poLayer == nullptr)
1772                 {
1773                     auto poSRSOri = GetSpatialRef();
1774                     OGRSpatialReference* poSRS = poSRSOri ? poSRSOri->Clone() : nullptr;
1775                     poLayer =
1776                         new OGRPDFLayer(this, osSanitizedName.c_str(), poSRS, wkbUnknown);
1777                     if( poSRS )
1778                         poSRS->Release();
1779 
1780                     papoLayers = (OGRLayer**)
1781                         CPLRealloc(papoLayers, (nLayers + 1) * sizeof(OGRLayer*));
1782                     papoLayers[nLayers] = poLayer;
1783                     nLayers ++;
1784                 }
1785 
1786                 oMapNumGenToLayer[ std::pair<int,int>(oLayerWithref.nOCGNum.toInt(), oLayerWithref.nOCGGen) ] = poLayer;
1787             }
1788 
1789             std::map<CPLString, GDALPDFObject*>& oMap =
1790                                     poProperties->GetDictionary()->GetValues();
1791             std::map<CPLString, GDALPDFObject*>::iterator oIter = oMap.begin();
1792             std::map<CPLString, GDALPDFObject*>::iterator oEnd = oMap.end();
1793 
1794             for(; oIter != oEnd; ++oIter)
1795             {
1796                 const char* pszKey = oIter->first.c_str();
1797                 GDALPDFObject* poObj = oIter->second;
1798                 if( poObj->GetRefNum().toBool() )
1799                 {
1800                     std::map< std::pair<int, int>, OGRPDFLayer *>::iterator
1801                         oIterNumGenToLayer = oMapNumGenToLayer.find(
1802                             std::pair<int,int>(poObj->GetRefNum().toInt(), poObj->GetRefGen()) );
1803                     if( oIterNumGenToLayer != oMapNumGenToLayer.end() )
1804                     {
1805                         oMapPropertyToLayer[pszKey] = oIterNumGenToLayer->second;
1806                     }
1807                 }
1808             }
1809         }
1810     }
1811 
1812     OGRPDFLayer* poSingleLayer = nullptr;
1813     if( nLayers == 0 )
1814     {
1815         if( CPLTestBool(CPLGetConfigOption("OGR_PDF_READ_NON_STRUCTURED", "NO")) )
1816         {
1817             OGRPDFLayer *poLayer =
1818                 new OGRPDFLayer(this, "content", nullptr, wkbUnknown);
1819             papoLayers = (OGRLayer**)
1820                 CPLRealloc(papoLayers, (nLayers + 1) * sizeof(OGRLayer*));
1821             papoLayers[nLayers] = poLayer;
1822             nLayers ++;
1823             poSingleLayer = poLayer;
1824         }
1825         else
1826         {
1827             return;
1828         }
1829     }
1830 
1831     ExploreContentsNonStructuredInternal(poContents,
1832                                          poResources,
1833                                          oMapPropertyToLayer,
1834                                          poSingleLayer);
1835 
1836     /* Remove empty layers */
1837     int i = 0;
1838     while(i < nLayers)
1839     {
1840         if (papoLayers[i]->GetFeatureCount() == 0)
1841         {
1842             delete papoLayers[i];
1843             if (i < nLayers - 1)
1844             {
1845                 memmove(papoLayers + i, papoLayers + i + 1,
1846                         (nLayers - 1 - i) * sizeof(OGRPDFLayer*));
1847             }
1848             nLayers --;
1849         }
1850         else
1851             i ++;
1852     }
1853 }
1854 
1855 #endif /* HAVE_PDF_READ_SUPPORT */
1856