1 /******************************************************************************
2 *
3 * Project: PDF driver
4 * Purpose: GDALDataset driver for PDF dataset (read vector features)
5 * Author: Even Rouault, <even dot rouault at spatialys.com>
6 *
7 ******************************************************************************
8 * Copyright (c) 2010-2014, Even Rouault <even dot rouault at spatialys.com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a
11 * copy of this software and associated documentation files (the "Software"),
12 * to deal in the Software without restriction, including without limitation
13 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 * and/or sell copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included
18 * in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 ****************************************************************************/
28
29 #include "gdal_pdf.h"
30
31 #include <array>
32
33 #define SQUARE(x) ((x)*(x))
34 #define EPSILON 1e-5
35
36 CPL_CVSID("$Id: pdfreadvectors.cpp 6c0684aa03b8700ef7ff6329ba2bce592d453c9f 2021-08-11 14:42:51 +0200 Even Rouault $")
37
38 #ifdef HAVE_PDF_READ_SUPPORT
39
40 constexpr int BEZIER_STEPS = 10;
41
42 /************************************************************************/
43 /* OpenVectorLayers() */
44 /************************************************************************/
45
OpenVectorLayers(GDALPDFDictionary * poPageDict)46 int PDFDataset::OpenVectorLayers(GDALPDFDictionary* poPageDict)
47 {
48 if( bHasLoadedLayers )
49 return TRUE;
50 bHasLoadedLayers = TRUE;
51
52 if( poPageDict == nullptr )
53 {
54 poPageDict = poPageObj->GetDictionary();
55 if ( poPageDict == nullptr )
56 return FALSE;
57 }
58
59 GetCatalog();
60 if( poCatalogObject == nullptr || poCatalogObject->GetType() != PDFObjectType_Dictionary )
61 return FALSE;
62
63 GDALPDFObject* poContents = poPageDict->Get("Contents");
64 if (poContents == nullptr)
65 return FALSE;
66
67 if (poContents->GetType() != PDFObjectType_Dictionary &&
68 poContents->GetType() != PDFObjectType_Array)
69 return FALSE;
70
71 GDALPDFObject* poResources = poPageDict->Get("Resources");
72 if (poResources == nullptr || poResources->GetType() != PDFObjectType_Dictionary)
73 return FALSE;
74
75 GDALPDFObject* poStructTreeRoot = poCatalogObject->GetDictionary()->Get("StructTreeRoot");
76 if (CPLTestBool(CPLGetConfigOption("OGR_PDF_READ_NON_STRUCTURED", "NO")) ||
77 poStructTreeRoot == nullptr ||
78 poStructTreeRoot->GetType() != PDFObjectType_Dictionary)
79 {
80 ExploreContentsNonStructured(poContents, poResources);
81 }
82 else
83 {
84 int nDepth = 0;
85 int nVisited = 0;
86 bool bStop = false;
87 ExploreContents(poContents, poResources, nDepth, nVisited, bStop);
88 std::set< std::pair<int,int> > aoSetAlreadyVisited;
89 ExploreTree(poStructTreeRoot, aoSetAlreadyVisited, 0);
90 }
91
92 CleanupIntermediateResources();
93
94 int bEmptyDS = TRUE;
95 for(int i=0;i<nLayers;i++)
96 {
97 if (papoLayers[i]->GetFeatureCount() != 0)
98 {
99 bEmptyDS = FALSE;
100 break;
101 }
102 }
103 return !bEmptyDS;
104 }
105
106 /************************************************************************/
107 /* CleanupIntermediateResources() */
108 /************************************************************************/
109
CleanupIntermediateResources()110 void PDFDataset::CleanupIntermediateResources()
111 {
112 std::map<int,OGRGeometry*>::iterator oMapIter = oMapMCID.begin();
113 for( ; oMapIter != oMapMCID.end(); ++oMapIter)
114 delete oMapIter->second;
115 oMapMCID.erase(oMapMCID.begin(), oMapMCID.end());
116 }
117
118 /************************************************************************/
119 /* InitMapOperators() */
120 /************************************************************************/
121
122 typedef struct
123 {
124 char szOpName[4];
125 int nArgs;
126 } PDFOperator;
127
128 static const PDFOperator asPDFOperators [] =
129 {
130 { "b", 0 },
131 { "B", 0 },
132 { "b*", 0 },
133 { "B*", 0 },
134 { "BDC", 2 },
135 // BI
136 { "BMC", 1 },
137 // BT
138 { "BX", 0 },
139 { "c", 6 },
140 { "cm", 6 },
141 { "CS", 1 },
142 { "cs", 1 },
143 { "d", 1 }, /* we have ignored the first arg */
144 // d0
145 // d1
146 { "Do", 1 },
147 { "DP", 2 },
148 // EI
149 { "EMC", 0 },
150 // ET
151 { "EX", 0 },
152 { "f", 0 },
153 { "F", 0 },
154 { "f*", 0 },
155 { "G", 1 },
156 { "g", 1 },
157 { "gs", 1 },
158 { "h", 0 },
159 { "i", 1 },
160 // ID
161 { "j", 1 },
162 { "J", 1 },
163 { "K", 4 },
164 { "k", 4 },
165 { "l", 2 },
166 { "m", 2 },
167 { "M", 1 },
168 { "MP", 1 },
169 { "n", 0 },
170 { "q", 0 },
171 { "Q", 0 },
172 { "re", 4 },
173 { "RG", 3 },
174 { "rg", 3 },
175 { "ri", 1 },
176 { "s", 0 },
177 { "S", 0 },
178 { "SC", -1 },
179 { "sc", -1 },
180 { "SCN", -1 },
181 { "scn", -1 },
182 { "sh", 1 },
183 // T*
184 { "Tc", 1},
185 { "Td", 2},
186 { "TD", 2},
187 { "Tf", 1},
188 { "Tj", 1},
189 { "TJ", 1},
190 { "TL", 1},
191 { "Tm", 6},
192 { "Tr", 1},
193 { "Ts", 1},
194 { "Tw", 1},
195 { "Tz", 1},
196 { "v", 4 },
197 { "w", 1 },
198 { "W", 0 },
199 { "W*", 0 },
200 { "y", 4 },
201 // '
202 // "
203 };
204
InitMapOperators()205 void PDFDataset::InitMapOperators()
206 {
207 for(size_t i=0;i<sizeof(asPDFOperators) / sizeof(asPDFOperators[0]); i++)
208 oMapOperators[asPDFOperators[i].szOpName] = asPDFOperators[i].nArgs;
209 }
210
211 /************************************************************************/
212 /* TestCapability() */
213 /************************************************************************/
214
TestCapability(CPL_UNUSED const char * pszCap)215 int PDFDataset::TestCapability( CPL_UNUSED const char * pszCap )
216 {
217 return FALSE;
218 }
219
220 /************************************************************************/
221 /* GetLayer() */
222 /************************************************************************/
223
GetLayer(int iLayer)224 OGRLayer *PDFDataset::GetLayer( int iLayer )
225
226 {
227 OpenVectorLayers(nullptr);
228 if (iLayer < 0 || iLayer >= nLayers)
229 return nullptr;
230
231 return papoLayers[iLayer];
232 }
233
234 /************************************************************************/
235 /* GetLayerCount() */
236 /************************************************************************/
237
GetLayerCount()238 int PDFDataset::GetLayerCount()
239 {
240 OpenVectorLayers(nullptr);
241 return nLayers;
242 }
243
244 /************************************************************************/
245 /* ExploreTree() */
246 /************************************************************************/
247
ExploreTree(GDALPDFObject * poObj,std::set<std::pair<int,int>> aoSetAlreadyVisited,int nRecLevel)248 void PDFDataset::ExploreTree(GDALPDFObject* poObj,
249 std::set< std::pair<int,int> > aoSetAlreadyVisited,
250 int nRecLevel)
251 {
252 if (nRecLevel == 16)
253 return;
254
255 std::pair<int,int> oObjPair( poObj->GetRefNum().toInt(), poObj->GetRefGen() );
256 if( aoSetAlreadyVisited.find( oObjPair ) != aoSetAlreadyVisited.end() )
257 return;
258 aoSetAlreadyVisited.insert( oObjPair );
259
260 if (poObj->GetType() != PDFObjectType_Dictionary)
261 return;
262
263 GDALPDFDictionary* poDict = poObj->GetDictionary();
264
265 GDALPDFObject* poS = poDict->Get("S");
266 CPLString osS;
267 if (poS != nullptr && poS->GetType() == PDFObjectType_Name)
268 {
269 osS = poS->GetName();
270 }
271
272 GDALPDFObject* poT = poDict->Get("T");
273 CPLString osT;
274 if (poT != nullptr && poT->GetType() == PDFObjectType_String)
275 {
276 osT = poT->GetString();
277 }
278
279 GDALPDFObject* poK = poDict->Get("K");
280 if (poK == nullptr)
281 return;
282
283 if (poK->GetType() == PDFObjectType_Array)
284 {
285 GDALPDFArray* poArray = poK->GetArray();
286 if (poArray->GetLength() > 0 &&
287 poArray->Get(0) &&
288 poArray->Get(0)->GetType() == PDFObjectType_Dictionary &&
289 poArray->Get(0)->GetDictionary()->Get("K") != nullptr &&
290 poArray->Get(0)->GetDictionary()->Get("K")->GetType() == PDFObjectType_Int)
291 {
292 CPLString osLayerName;
293 if (!osT.empty() )
294 osLayerName = osT;
295 else
296 {
297 if (!osS.empty() )
298 osLayerName = osS;
299 else
300 osLayerName = CPLSPrintf("Layer%d", nLayers + 1);
301 }
302
303 auto poSRSOri = GetSpatialRef();
304 OGRSpatialReference* poSRS = poSRSOri ? poSRSOri->Clone() : nullptr;
305 OGRPDFLayer* poLayer =
306 new OGRPDFLayer(this, osLayerName.c_str(), poSRS, wkbUnknown);
307 if( poSRS )
308 poSRS->Release();
309
310 poLayer->Fill(poArray);
311
312 papoLayers = (OGRLayer**)
313 CPLRealloc(papoLayers, (nLayers + 1) * sizeof(OGRLayer*));
314 papoLayers[nLayers] = poLayer;
315 nLayers ++;
316 }
317 else
318 {
319 for(int i=0;i<poArray->GetLength();i++)
320 {
321 auto poSubObj = poArray->Get(i);
322 if (poSubObj )
323 {
324 ExploreTree(poSubObj, aoSetAlreadyVisited,
325 nRecLevel + 1);
326 }
327 }
328 }
329 }
330 else if (poK->GetType() == PDFObjectType_Dictionary)
331 {
332 ExploreTree(poK, aoSetAlreadyVisited, nRecLevel + 1);
333 }
334 }
335
336 /************************************************************************/
337 /* GetGeometryFromMCID() */
338 /************************************************************************/
339
GetGeometryFromMCID(int nMCID)340 OGRGeometry* PDFDataset::GetGeometryFromMCID(int nMCID)
341 {
342 std::map<int,OGRGeometry*>::iterator oMapIter = oMapMCID.find(nMCID);
343 if (oMapIter != oMapMCID.end())
344 return oMapIter->second;
345 else
346 return nullptr;
347 }
348
349 /************************************************************************/
350 /* GraphicState */
351 /************************************************************************/
352
353 class GraphicState
354 {
355 public:
356 std::array<double,6> adfCM;
357 std::array<double,3> adfStrokeColor;
358 std::array<double,3> adfFillColor;
359
GraphicState()360 GraphicState()
361 {
362 adfCM[0] = 1;
363 adfCM[1] = 0;
364 adfCM[2] = 0;
365 adfCM[3] = 1;
366 adfCM[4] = 0;
367 adfCM[5] = 0;
368 adfStrokeColor[0] = 0.0;
369 adfStrokeColor[1] = 0.0;
370 adfStrokeColor[2] = 0.0;
371 adfFillColor[0] = 1.0;
372 adfFillColor[1] = 1.0;
373 adfFillColor[2] = 1.0;
374 }
375
MultiplyBy(double adfMatrix[6])376 void MultiplyBy(double adfMatrix[6])
377 {
378 /*
379 [ a b 0 ] [ a' b' 0] [ aa' + bc' ab' + bd' 0 ]
380 [ c d 0 ] * [ c' d' 0] = [ ca' + dc' cb' + dd' 0 ]
381 [ e f 1 ] [ e' f' 1] [ ea' + fc' + e' eb' + fd' + f' 1 ]
382 */
383
384 double a = adfCM[0];
385 double b = adfCM[1];
386 double c = adfCM[2];
387 double d = adfCM[3];
388 double e = adfCM[4];
389 double f = adfCM[5];
390 double ap = adfMatrix[0];
391 double bp = adfMatrix[1];
392 double cp = adfMatrix[2];
393 double dp = adfMatrix[3];
394 double ep = adfMatrix[4];
395 double fp = adfMatrix[5];
396 adfCM[0] = a*ap + b*cp;
397 adfCM[1] = a*bp + b*dp;
398 adfCM[2] = c*ap + d*cp;
399 adfCM[3] = c*bp + d*dp;
400 adfCM[4] = e*ap + f*cp + ep;
401 adfCM[5] = e*bp + f*dp + fp;
402 }
403
ApplyMatrix(double adfCoords[2])404 void ApplyMatrix(double adfCoords[2])
405 {
406 double x = adfCoords[0];
407 double y = adfCoords[1];
408
409 adfCoords[0] = x * adfCM[0] + y * adfCM[2] + adfCM[4];
410 adfCoords[1] = x * adfCM[1] + y * adfCM[3] + adfCM[5];
411 }
412 };
413
414 /************************************************************************/
415 /* PDFCoordsToSRSCoords() */
416 /************************************************************************/
417
PDFCoordsToSRSCoords(double x,double y,double & X,double & Y)418 void PDFDataset::PDFCoordsToSRSCoords(double x, double y,
419 double& X, double &Y)
420 {
421 x = x / dfPageWidth * nRasterXSize;
422 if( bGeoTransformValid )
423 y = (1 - y / dfPageHeight) * nRasterYSize;
424 else
425 y = (y / dfPageHeight) * nRasterYSize;
426
427 X = adfGeoTransform[0] + x * adfGeoTransform[1] + y * adfGeoTransform[2];
428 Y = adfGeoTransform[3] + x * adfGeoTransform[4] + y * adfGeoTransform[5];
429
430 if( fabs(X - (int)floor(X + 0.5)) < 1e-8 )
431 X = (int)floor(X + 0.5);
432 if( fabs(Y - (int)floor(Y + 0.5)) < 1e-8 )
433 Y = (int)floor(Y + 0.5);
434 }
435
436 /************************************************************************/
437 /* PDFGetCircleCenter() */
438 /************************************************************************/
439
440 /* Return the center of a circle, or NULL if it is not recognized */
441
PDFGetCircleCenter(OGRLineString * poLS)442 static OGRPoint* PDFGetCircleCenter(OGRLineString* poLS)
443 {
444 if (poLS == nullptr || poLS->getNumPoints() != 1 + 4 * BEZIER_STEPS)
445 return nullptr;
446
447 if (poLS->getY(0 * BEZIER_STEPS) == poLS->getY(2 * BEZIER_STEPS) &&
448 poLS->getX(1 * BEZIER_STEPS) == poLS->getX(3 * BEZIER_STEPS) &&
449 fabs((poLS->getX(0 * BEZIER_STEPS) + poLS->getX(2 * BEZIER_STEPS)) / 2 - poLS->getX(1 * BEZIER_STEPS)) < EPSILON &&
450 fabs((poLS->getY(1 * BEZIER_STEPS) + poLS->getY(3 * BEZIER_STEPS)) / 2 - poLS->getY(0 * BEZIER_STEPS)) < EPSILON)
451 {
452 return new OGRPoint((poLS->getX(0 * BEZIER_STEPS) + poLS->getX(2 * BEZIER_STEPS)) / 2,
453 (poLS->getY(1 * BEZIER_STEPS) + poLS->getY(3 * BEZIER_STEPS)) / 2);
454 }
455 return nullptr;
456 }
457
458 /************************************************************************/
459 /* PDFGetSquareCenter() */
460 /************************************************************************/
461
462 /* Return the center of a square, or NULL if it is not recognized */
463
PDFGetSquareCenter(OGRLineString * poLS)464 static OGRPoint* PDFGetSquareCenter(OGRLineString* poLS)
465 {
466 if (poLS == nullptr || poLS->getNumPoints() < 4 || poLS->getNumPoints() > 5)
467 return nullptr;
468
469 if (poLS->getX(0) == poLS->getX(3) &&
470 poLS->getY(0) == poLS->getY(1) &&
471 poLS->getX(1) == poLS->getX(2) &&
472 poLS->getY(2) == poLS->getY(3) &&
473 fabs(fabs(poLS->getX(0) - poLS->getX(1)) - fabs(poLS->getY(0) - poLS->getY(3))) < EPSILON)
474 {
475 return new OGRPoint((poLS->getX(0) + poLS->getX(1)) / 2,
476 (poLS->getY(0) + poLS->getY(3)) / 2);
477 }
478 return nullptr;
479 }
480
481 /************************************************************************/
482 /* PDFGetTriangleCenter() */
483 /************************************************************************/
484
485 /* Return the center of a equilateral triangle, or NULL if it is not recognized */
486
PDFGetTriangleCenter(OGRLineString * poLS)487 static OGRPoint* PDFGetTriangleCenter(OGRLineString* poLS)
488 {
489 if (poLS == nullptr || poLS->getNumPoints() < 3 || poLS->getNumPoints() > 4)
490 return nullptr;
491
492 double dfSqD1 = SQUARE(poLS->getX(0) - poLS->getX(1)) + SQUARE(poLS->getY(0) - poLS->getY(1));
493 double dfSqD2 = SQUARE(poLS->getX(1) - poLS->getX(2)) + SQUARE(poLS->getY(1) - poLS->getY(2));
494 double dfSqD3 = SQUARE(poLS->getX(0) - poLS->getX(2)) + SQUARE(poLS->getY(0) - poLS->getY(2));
495 if (fabs(dfSqD1 - dfSqD2) < EPSILON && fabs(dfSqD2 - dfSqD3) < EPSILON)
496 {
497 return new OGRPoint((poLS->getX(0) + poLS->getX(1) + poLS->getX(2)) / 3,
498 (poLS->getY(0) + poLS->getY(1) + poLS->getY(2)) / 3);
499 }
500 return nullptr;
501 }
502
503 /************************************************************************/
504 /* PDFGetStarCenter() */
505 /************************************************************************/
506
507 /* Return the center of a 5-point star, or NULL if it is not recognized */
508
PDFGetStarCenter(OGRLineString * poLS)509 static OGRPoint* PDFGetStarCenter(OGRLineString* poLS)
510 {
511 if (poLS == nullptr || poLS->getNumPoints() < 10 || poLS->getNumPoints() > 11)
512 return nullptr;
513
514 double dfSqD01 = SQUARE(poLS->getX(0) - poLS->getX(1)) +
515 SQUARE(poLS->getY(0) - poLS->getY(1));
516 double dfSqD02 = SQUARE(poLS->getX(0) - poLS->getX(2)) +
517 SQUARE(poLS->getY(0) - poLS->getY(2));
518 double dfSqD13 = SQUARE(poLS->getX(1) - poLS->getX(3)) +
519 SQUARE(poLS->getY(1) - poLS->getY(3));
520 const double dfSin18divSin126 = 0.38196601125;
521 if( dfSqD02 == 0 )
522 return nullptr;
523 int bOK = fabs(dfSqD13 / dfSqD02 - SQUARE(dfSin18divSin126)) < EPSILON;
524 for(int i=1;i<10 && bOK;i++)
525 {
526 double dfSqDiip1 = SQUARE(poLS->getX(i) - poLS->getX((i+1)%10)) +
527 SQUARE(poLS->getY(i) - poLS->getY((i+1)%10));
528 if (fabs(dfSqDiip1 - dfSqD01) > EPSILON)
529 {
530 bOK = FALSE;
531 }
532 double dfSqDiip2 = SQUARE(poLS->getX(i) - poLS->getX((i+2)%10)) +
533 SQUARE(poLS->getY(i) - poLS->getY((i+2)%10));
534 if ( (i%2) == 1 && fabs(dfSqDiip2 - dfSqD13) > EPSILON )
535 {
536 bOK = FALSE;
537 }
538 if ( (i%2) == 0 && fabs(dfSqDiip2 - dfSqD02) > EPSILON )
539 {
540 bOK = FALSE;
541 }
542 }
543 if (bOK)
544 {
545 return new OGRPoint((poLS->getX(0) + poLS->getX(2) + poLS->getX(4) +
546 poLS->getX(6) + poLS->getX(8)) / 5,
547 (poLS->getY(0) + poLS->getY(2) + poLS->getY(4) +
548 poLS->getY(6) + poLS->getY(8)) / 5);
549 }
550 return nullptr;
551 }
552
553 /************************************************************************/
554 /* UnstackTokens() */
555 /************************************************************************/
556
UnstackTokens(const char * pszToken,int nRequiredArgs,char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE],int & nTokenStackSize,double * adfCoords)557 int PDFDataset::UnstackTokens(const char* pszToken,
558 int nRequiredArgs,
559 char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE],
560 int& nTokenStackSize,
561 double* adfCoords)
562 {
563 if (nTokenStackSize < nRequiredArgs)
564 {
565 CPLDebug("PDF", "not enough arguments for %s", pszToken);
566 return FALSE;
567 }
568 nTokenStackSize -= nRequiredArgs;
569 for(int i=0;i<nRequiredArgs;i++)
570 {
571 adfCoords[i] = CPLAtof(aszTokenStack[nTokenStackSize+i]);
572 }
573 return TRUE;
574 }
575
576 /************************************************************************/
577 /* AddBezierCurve() */
578 /************************************************************************/
579
AddBezierCurve(std::vector<double> & oCoords,const double * x0_y0,const double * x1_y1,const double * x2_y2,const double * x3_y3)580 static void AddBezierCurve(std::vector<double>& oCoords,
581 const double* x0_y0,
582 const double* x1_y1,
583 const double* x2_y2,
584 const double* x3_y3)
585 {
586 double x0 = x0_y0[0];
587 double y0 = x0_y0[1];
588 double x1 = x1_y1[0];
589 double y1 = x1_y1[1];
590 double x2 = x2_y2[0];
591 double y2 = x2_y2[1];
592 double x3 = x3_y3[0];
593 double y3 = x3_y3[1];
594 for( int i = 1; i < BEZIER_STEPS; i++ )
595 {
596 const double t = static_cast<double>(i) / BEZIER_STEPS;
597 const double t2 = t * t;
598 const double t3 = t2 * t;
599 const double oneMinust = 1 - t;
600 const double oneMinust2 = oneMinust * oneMinust;
601 const double oneMinust3 = oneMinust2 * oneMinust;
602 const double three_t_oneMinust = 3 * t * oneMinust;
603 const double x = oneMinust3 * x0 + three_t_oneMinust * (oneMinust * x1 + t * x2) + t3 * x3;
604 const double y = oneMinust3 * y0 + three_t_oneMinust * (oneMinust * y1 + t * y2) + t3 * y3;
605 oCoords.push_back(x);
606 oCoords.push_back(y);
607 }
608 oCoords.push_back(x3);
609 oCoords.push_back(y3);
610 }
611
612 /************************************************************************/
613 /* ParseContent() */
614 /************************************************************************/
615
616 #define NEW_SUBPATH -99
617 #define CLOSE_SUBPATH -98
618 #define FILL_SUBPATH -97
619
ParseContent(const char * pszContent,GDALPDFObject * poResources,int bInitBDCStack,int bMatchQ,std::map<CPLString,OGRPDFLayer * > & oMapPropertyToLayer,OGRPDFLayer * poCurLayer)620 OGRGeometry* PDFDataset::ParseContent(const char* pszContent,
621 GDALPDFObject* poResources,
622 int bInitBDCStack,
623 int bMatchQ,
624 std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer,
625 OGRPDFLayer* poCurLayer)
626 {
627
628 #define PUSH(aszTokenStack, str, strlen) \
629 do \
630 { \
631 if(nTokenStackSize < TOKEN_STACK_SIZE) \
632 memcpy(aszTokenStack[nTokenStackSize ++], str, strlen + 1); \
633 else \
634 { \
635 CPLError(CE_Failure, CPLE_AppDefined, \
636 "Max token stack size reached"); \
637 return nullptr; \
638 }; \
639 } while( false )
640
641 #define ADD_CHAR(szToken, c) \
642 do \
643 { \
644 if(nTokenSize < MAX_TOKEN_SIZE-1) \
645 { \
646 szToken[nTokenSize ++ ] = c; \
647 szToken[nTokenSize ] = '\0'; \
648 } \
649 else \
650 { \
651 CPLError(CE_Failure, CPLE_AppDefined, "Max token size reached");\
652 return nullptr; \
653 }; \
654 } while( false )
655
656 char szToken[MAX_TOKEN_SIZE];
657 int nTokenSize = 0;
658 char ch;
659 char aszTokenStack[TOKEN_STACK_SIZE][MAX_TOKEN_SIZE];
660 int nTokenStackSize = 0;
661 int bInString = FALSE;
662 int nBDCLevel = 0;
663 int nParenthesisLevel = 0;
664 int nArrayLevel = 0;
665 int nBTLevel = 0;
666
667 int bCollectAllObjects = poResources != nullptr && !bInitBDCStack && !bMatchQ;
668
669 GraphicState oGS;
670 std::stack<GraphicState> oGSStack;
671 std::stack<OGRPDFLayer*> oLayerStack;
672
673 std::vector<double> oCoords;
674 int bHasFoundFill = FALSE;
675 int bHasMultiPart = FALSE;
676
677 szToken[0] = '\0';
678
679 if (bInitBDCStack)
680 {
681 PUSH(aszTokenStack, "dummy", 5);
682 PUSH(aszTokenStack, "dummy", 5);
683 oLayerStack.push(nullptr);
684 }
685
686 while((ch = *pszContent) != '\0')
687 {
688 int bPushToken = FALSE;
689
690 if (!bInString && ch == '%')
691 {
692 /* Skip comments until end-of-line */
693 while((ch = *pszContent) != '\0')
694 {
695 if (ch == '\r' || ch == '\n')
696 break;
697 pszContent ++;
698 }
699 if (ch == 0)
700 break;
701 }
702 else if (!bInString && (ch == ' ' || ch == '\r' || ch == '\n'))
703 {
704 bPushToken = TRUE;
705 }
706
707 /* Ignore arrays */
708 else if (!bInString && nTokenSize == 0 && ch == '[')
709 {
710 nArrayLevel ++;
711 }
712 else if (!bInString && nArrayLevel && ch == ']')
713 {
714 nArrayLevel --;
715 }
716
717 else if (!bInString && nTokenSize == 0 && ch == '(')
718 {
719 bInString = TRUE;
720 nParenthesisLevel ++;
721 ADD_CHAR(szToken, ch);
722 }
723 else if (bInString && ch == '(')
724 {
725 nParenthesisLevel ++;
726 ADD_CHAR(szToken, ch);
727 }
728 else if (bInString && ch == ')')
729 {
730 nParenthesisLevel --;
731 ADD_CHAR(szToken, ch);
732 if (nParenthesisLevel == 0)
733 {
734 bInString = FALSE;
735 bPushToken = TRUE;
736 }
737 }
738 else if( bInString && ch == '\\' )
739 {
740 const auto nextCh = pszContent[1];
741 if( nextCh == 'n' )
742 {
743 ADD_CHAR(szToken, '\n');
744 pszContent ++;
745 }
746 else if( nextCh == 'r' )
747 {
748 ADD_CHAR(szToken, '\r');
749 pszContent ++;
750 }
751 else if( nextCh == 't' )
752 {
753 ADD_CHAR(szToken, '\t');
754 pszContent ++;
755 }
756 else if( nextCh == 'b' )
757 {
758 ADD_CHAR(szToken, '\b');
759 pszContent ++;
760 }
761 else if( nextCh == '(' || nextCh == ')' || nextCh == '\\' )
762 {
763 ADD_CHAR(szToken, nextCh);
764 pszContent ++;
765 }
766 else if( nextCh >= '0' && nextCh <= '7' &&
767 pszContent[2] >= '0' && pszContent[2] <= '7' &&
768 pszContent[3] >= '0' && pszContent[3] <= '7' )
769 {
770 ADD_CHAR(szToken,
771 ((nextCh - '\0') * 64 + (pszContent[2] - '\0') * 8 + pszContent[3] - '\0'));
772 pszContent += 3;
773 }
774 else if( nextCh == '\n' )
775 {
776 if( pszContent[2] == '\r' )
777 pszContent += 2;
778 else
779 pszContent ++;
780 }
781 else if( nextCh == '\r' )
782 {
783 pszContent ++;
784 }
785 }
786 else if (ch == '<' && pszContent[1] == '<' && nTokenSize == 0)
787 {
788 int nDictDepth = 0;
789
790 while(*pszContent != '\0')
791 {
792 if (pszContent[0] == '<' && pszContent[1] == '<')
793 {
794 ADD_CHAR(szToken, '<');
795 ADD_CHAR(szToken, '<');
796 nDictDepth ++;
797 pszContent += 2;
798 }
799 else if (pszContent[0] == '>' && pszContent[1] == '>')
800 {
801 ADD_CHAR(szToken, '>');
802 ADD_CHAR(szToken, '>');
803 nDictDepth --;
804 pszContent += 2;
805 if (nDictDepth == 0)
806 break;
807 }
808 else
809 {
810 ADD_CHAR(szToken, *pszContent);
811 pszContent ++;
812 }
813 }
814 if (nDictDepth == 0)
815 {
816 bPushToken = TRUE;
817 pszContent --;
818 }
819 else
820 break;
821 }
822 else
823 {
824 // Do not create too long tokens in arrays, that we will ignore
825 // anyway
826 if( nArrayLevel == 0 || nTokenSize == 0 )
827 {
828 ADD_CHAR(szToken, ch);
829 }
830 }
831
832 pszContent ++;
833 if (pszContent[0] == '\0')
834 bPushToken = TRUE;
835
836 #define EQUAL1(szToken, s) (szToken[0] == s[0] && szToken[1] == '\0')
837 #define EQUAL2(szToken, s) (szToken[0] == s[0] && szToken[1] == s[1] && szToken[2] == '\0')
838 #define EQUAL3(szToken, s) (szToken[0] == s[0] && szToken[1] == s[1] && szToken[2] == s[2] && szToken[3] == '\0')
839
840 if (bPushToken && nTokenSize)
841 {
842 if (EQUAL2(szToken, "BI"))
843 {
844 while(*pszContent != '\0')
845 {
846 if( pszContent[0] == 'E' && pszContent[1] == 'I' && pszContent[2] == ' ' )
847 {
848 break;
849 }
850 pszContent ++;
851 }
852 if( pszContent[0] == 'E' )
853 pszContent += 3;
854 else
855 return nullptr;
856 }
857 else if (EQUAL3(szToken, "BDC"))
858 {
859 if (nTokenStackSize < 2)
860 {
861 CPLDebug("PDF",
862 "not enough arguments for %s",
863 szToken);
864 return nullptr;
865 }
866 nTokenStackSize -= 2;
867 const char* pszOC = aszTokenStack[nTokenStackSize];
868 const char* pszOCGName = aszTokenStack[nTokenStackSize+1];
869
870 nBDCLevel ++;
871
872 if( EQUAL3(pszOC, "/OC") && pszOCGName[0] == '/' )
873 {
874 std::map<CPLString, OGRPDFLayer*>::iterator oIter =
875 oMapPropertyToLayer.find(pszOCGName + 1);
876 if( oIter != oMapPropertyToLayer.end() )
877 {
878 poCurLayer = oIter->second;
879 //CPLDebug("PDF", "Cur layer : %s", poCurLayer->GetName());
880 }
881 }
882
883 oLayerStack.push(poCurLayer);
884 //CPLDebug("PDF", "%s %s BDC", osOC.c_str(), osOCGName.c_str());
885 }
886 else if (EQUAL3(szToken, "EMC"))
887 {
888 //CPLDebug("PDF", "EMC");
889 if( !oLayerStack.empty() )
890 {
891 oLayerStack.pop();
892 if( !oLayerStack.empty() )
893 poCurLayer = oLayerStack.top();
894 else
895 poCurLayer = nullptr;
896
897 /*if (poCurLayer)
898 {
899 CPLDebug("PDF", "Cur layer : %s", poCurLayer->GetName());
900 }*/
901 }
902 else
903 {
904 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
905 poCurLayer = nullptr;
906 //return NULL;
907 }
908
909 nBDCLevel --;
910 if (nBDCLevel == 0 && bInitBDCStack)
911 break;
912 }
913
914 /* Ignore any text stuff */
915 else if (EQUAL2(szToken, "BT"))
916 nBTLevel ++;
917 else if (EQUAL2(szToken, "ET"))
918 {
919 nBTLevel --;
920 if (nBTLevel < 0)
921 {
922 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
923 return nullptr;
924 }
925 }
926 else if (!nArrayLevel && !nBTLevel)
927 {
928 int bEmitFeature = FALSE;
929
930 if( szToken[0] < 'A' )
931 {
932 PUSH(aszTokenStack, szToken, nTokenSize);
933 }
934 else if (EQUAL1(szToken, "q"))
935 {
936 oGSStack.push(oGS);
937 }
938 else if (EQUAL1(szToken, "Q"))
939 {
940 if (oGSStack.empty())
941 {
942 CPLDebug("PDF", "not enough arguments for %s", szToken);
943 return nullptr;
944 }
945
946 oGS = oGSStack.top();
947 oGSStack.pop();
948
949 if (oGSStack.empty() && bMatchQ)
950 break;
951 }
952 else if (EQUAL2(szToken, "cm"))
953 {
954 double adfMatrix[6];
955 if (!UnstackTokens(szToken, 6, aszTokenStack, nTokenStackSize, adfMatrix))
956 {
957 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
958 return nullptr;
959 }
960
961 oGS.MultiplyBy(adfMatrix);
962 }
963 else if (EQUAL1(szToken, "b") || /* closepath, fill, stroke */
964 EQUAL2(szToken, "b*") /* closepath, eofill, stroke */)
965 {
966 if (!(!oCoords.empty() &&
967 oCoords[oCoords.size() - 2] == CLOSE_SUBPATH &&
968 oCoords.back() == CLOSE_SUBPATH))
969 {
970 oCoords.push_back(CLOSE_SUBPATH);
971 oCoords.push_back(CLOSE_SUBPATH);
972 }
973 oCoords.push_back(FILL_SUBPATH);
974 oCoords.push_back(FILL_SUBPATH);
975 bHasFoundFill = TRUE;
976
977 bEmitFeature = TRUE;
978 }
979 else if (EQUAL1(szToken, "B") || /* fill, stroke */
980 EQUAL2(szToken, "B*") || /* eofill, stroke */
981 EQUAL1(szToken, "f") || /* fill */
982 EQUAL1(szToken, "F") || /* fill */
983 EQUAL2(szToken, "f*") /* eofill */ )
984 {
985 oCoords.push_back(FILL_SUBPATH);
986 oCoords.push_back(FILL_SUBPATH);
987 bHasFoundFill = TRUE;
988
989 bEmitFeature = TRUE;
990 }
991 else if (EQUAL1(szToken, "h")) /* close subpath */
992 {
993 if (!(!oCoords.empty() &&
994 oCoords[oCoords.size() - 2] == CLOSE_SUBPATH &&
995 oCoords.back() == CLOSE_SUBPATH))
996 {
997 oCoords.push_back(CLOSE_SUBPATH);
998 oCoords.push_back(CLOSE_SUBPATH);
999 }
1000 }
1001 else if (EQUAL1(szToken, "n")) /* new subpath without stroking or filling */
1002 {
1003 oCoords.resize(0);
1004 }
1005 else if (EQUAL1(szToken, "s")) /* close and stroke */
1006 {
1007 if (!(!oCoords.empty() &&
1008 oCoords[oCoords.size() - 2] == CLOSE_SUBPATH &&
1009 oCoords.back() == CLOSE_SUBPATH))
1010 {
1011 oCoords.push_back(CLOSE_SUBPATH);
1012 oCoords.push_back(CLOSE_SUBPATH);
1013 }
1014
1015 bEmitFeature = TRUE;
1016 }
1017 else if (EQUAL1(szToken, "S")) /* stroke */
1018 {
1019 bEmitFeature = TRUE;
1020 }
1021 else if (EQUAL1(szToken, "m") || EQUAL1(szToken, "l"))
1022 {
1023 double adfCoords[2];
1024 if (!UnstackTokens(szToken, 2, aszTokenStack, nTokenStackSize, adfCoords))
1025 {
1026 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1027 return nullptr;
1028 }
1029
1030 if (EQUAL1(szToken, "m"))
1031 {
1032 if (!oCoords.empty())
1033 bHasMultiPart = TRUE;
1034 oCoords.push_back(NEW_SUBPATH);
1035 oCoords.push_back(NEW_SUBPATH);
1036 }
1037
1038 oGS.ApplyMatrix(adfCoords);
1039 oCoords.push_back(adfCoords[0]);
1040 oCoords.push_back(adfCoords[1]);
1041 }
1042 else if (EQUAL1(szToken, "c")) /* Bezier curve */
1043 {
1044 double adfCoords[6];
1045 if (!UnstackTokens(szToken, 6, aszTokenStack, nTokenStackSize, adfCoords))
1046 {
1047 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1048 return nullptr;
1049 }
1050
1051 oGS.ApplyMatrix(adfCoords + 0);
1052 oGS.ApplyMatrix(adfCoords + 2);
1053 oGS.ApplyMatrix(adfCoords + 4);
1054 AddBezierCurve(oCoords,
1055 oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1056 &adfCoords[0],
1057 &adfCoords[2],
1058 &adfCoords[4]);
1059 }
1060 else if (EQUAL1(szToken, "v")) /* Bezier curve */
1061 {
1062 double adfCoords[4];
1063 if (!UnstackTokens(szToken, 4, aszTokenStack, nTokenStackSize, adfCoords))
1064 {
1065 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1066 return nullptr;
1067 }
1068
1069 oGS.ApplyMatrix(adfCoords + 0);
1070 oGS.ApplyMatrix(adfCoords + 2);
1071 AddBezierCurve(oCoords,
1072 oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1073 oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1074 &adfCoords[0],
1075 &adfCoords[2]);
1076 }
1077 else if (EQUAL1(szToken, "y")) /* Bezier curve */
1078 {
1079 double adfCoords[4];
1080 if (!UnstackTokens(szToken, 4, aszTokenStack, nTokenStackSize, adfCoords))
1081 {
1082 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1083 return nullptr;
1084 }
1085
1086 oGS.ApplyMatrix(adfCoords + 0);
1087 oGS.ApplyMatrix(adfCoords + 2);
1088 AddBezierCurve(oCoords,
1089 oCoords.empty() ? &adfCoords[0] : &oCoords[oCoords.size()-2],
1090 &adfCoords[0],
1091 &adfCoords[2],
1092 &adfCoords[2]);
1093 }
1094 else if (EQUAL2(szToken, "re")) /* Rectangle */
1095 {
1096 double adfCoords[4];
1097 if (!UnstackTokens(szToken, 4, aszTokenStack, nTokenStackSize, adfCoords))
1098 {
1099 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1100 return nullptr;
1101 }
1102
1103 adfCoords[2] += adfCoords[0];
1104 adfCoords[3] += adfCoords[1];
1105
1106 oGS.ApplyMatrix(adfCoords);
1107 oGS.ApplyMatrix(adfCoords + 2);
1108
1109 if (!oCoords.empty())
1110 bHasMultiPart = TRUE;
1111 oCoords.push_back(NEW_SUBPATH);
1112 oCoords.push_back(NEW_SUBPATH);
1113 oCoords.push_back(adfCoords[0]);
1114 oCoords.push_back(adfCoords[1]);
1115 oCoords.push_back(adfCoords[2]);
1116 oCoords.push_back(adfCoords[1]);
1117 oCoords.push_back(adfCoords[2]);
1118 oCoords.push_back(adfCoords[3]);
1119 oCoords.push_back(adfCoords[0]);
1120 oCoords.push_back(adfCoords[3]);
1121 oCoords.push_back(CLOSE_SUBPATH);
1122 oCoords.push_back(CLOSE_SUBPATH);
1123 }
1124
1125 else if (EQUAL2(szToken, "Do"))
1126 {
1127 if (nTokenStackSize == 0)
1128 {
1129 CPLDebug("PDF",
1130 "not enough arguments for %s",
1131 szToken);
1132 return nullptr;
1133 }
1134
1135 CPLString osObjectName = aszTokenStack[--nTokenStackSize];
1136
1137 if (osObjectName[0] != '/')
1138 {
1139 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1140 return nullptr;
1141 }
1142
1143 if (poResources == nullptr)
1144 {
1145 if (osObjectName.find("/SymImage") == 0)
1146 {
1147 oCoords.push_back(oGS.adfCM[4] + oGS.adfCM[0] / 2);
1148 oCoords.push_back(oGS.adfCM[5] + oGS.adfCM[3] / 2);
1149
1150 szToken[0] = '\0';
1151 nTokenSize = 0;
1152
1153 if( poCurLayer != nullptr)
1154 bEmitFeature = TRUE;
1155 else
1156 continue;
1157 }
1158 else
1159 {
1160 //CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1161 return nullptr;
1162 }
1163 }
1164
1165 if( !bEmitFeature )
1166 {
1167 GDALPDFObject* poXObject =
1168 poResources->GetDictionary()->Get("XObject");
1169 if (poXObject == nullptr ||
1170 poXObject->GetType() != PDFObjectType_Dictionary)
1171 {
1172 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1173 return nullptr;
1174 }
1175
1176 GDALPDFObject* poObject =
1177 poXObject->GetDictionary()->Get(osObjectName.c_str() + 1);
1178 if (poObject == nullptr)
1179 {
1180 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1181 return nullptr;
1182 }
1183
1184 int bParseStream = TRUE;
1185 /* Check if the object is an image. If so, no need to try to parse */
1186 /* it. */
1187 if (poObject->GetType() == PDFObjectType_Dictionary)
1188 {
1189 GDALPDFObject* poSubtype = poObject->GetDictionary()->Get("Subtype");
1190 if (poSubtype != nullptr &&
1191 poSubtype->GetType() == PDFObjectType_Name &&
1192 poSubtype->GetName() == "Image" )
1193 {
1194 bParseStream = FALSE;
1195 }
1196 }
1197
1198 if( bParseStream )
1199 {
1200 GDALPDFStream* poStream = poObject->GetStream();
1201 if (!poStream)
1202 {
1203 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1204 return nullptr;
1205 }
1206
1207 char* pszStr = poStream->GetBytes();
1208 if( pszStr )
1209 {
1210 OGRGeometry* poGeom = ParseContent(pszStr, nullptr, FALSE, FALSE,
1211 oMapPropertyToLayer, poCurLayer);
1212 CPLFree(pszStr);
1213 if (poGeom && !bCollectAllObjects)
1214 return poGeom;
1215 delete poGeom;
1216 }
1217 }
1218 }
1219 }
1220 else if( EQUAL2(szToken, "RG") || EQUAL2(szToken, "rg") )
1221 {
1222 double* padf = ( EQUAL2(szToken, "RG") ) ? &oGS.adfStrokeColor[0] : &oGS.adfFillColor[0];
1223 if (!UnstackTokens(szToken, 3, aszTokenStack, nTokenStackSize, padf))
1224 {
1225 CPLDebug("PDF", "Should not happen at line %d", __LINE__);
1226 return nullptr;
1227 }
1228 }
1229 else if (oMapOperators.find(szToken) != oMapOperators.end())
1230 {
1231 int nArgs = oMapOperators[szToken];
1232 if (nArgs < 0)
1233 {
1234 while( nTokenStackSize != 0 )
1235 {
1236 CPLString osTopToken = aszTokenStack[--nTokenStackSize];
1237 if (oMapOperators.find(osTopToken) != oMapOperators.end())
1238 break;
1239 }
1240 }
1241 else
1242 {
1243 if( nArgs > nTokenStackSize )
1244 {
1245 CPLDebug("PDF",
1246 "not enough arguments for %s",
1247 szToken);
1248 return nullptr;
1249 }
1250 nTokenStackSize -= nArgs;
1251 }
1252 }
1253 else
1254 {
1255 PUSH(aszTokenStack, szToken, nTokenSize);
1256 }
1257
1258 if( bEmitFeature && poCurLayer != nullptr)
1259 {
1260 OGRGeometry* poGeom = BuildGeometry(oCoords, bHasFoundFill, bHasMultiPart);
1261 bHasFoundFill = FALSE;
1262 bHasMultiPart = FALSE;
1263 if (poGeom)
1264 {
1265 OGRFeature* poFeature = new OGRFeature(poCurLayer->GetLayerDefn());
1266 if( bSetStyle )
1267 {
1268 OGRwkbGeometryType eType = wkbFlatten(poGeom->getGeometryType());
1269 if( eType == wkbLineString || eType == wkbMultiLineString )
1270 {
1271 poFeature->SetStyleString(CPLSPrintf("PEN(c:#%02X%02X%02X)",
1272 (int)(oGS.adfStrokeColor[0] * 255 + 0.5),
1273 (int)(oGS.adfStrokeColor[1] * 255 + 0.5),
1274 (int)(oGS.adfStrokeColor[2] * 255 + 0.5)));
1275 }
1276 else if( eType == wkbPolygon || eType == wkbMultiPolygon )
1277 {
1278 poFeature->SetStyleString(CPLSPrintf("PEN(c:#%02X%02X%02X);BRUSH(fc:#%02X%02X%02X)",
1279 (int)(oGS.adfStrokeColor[0] * 255 + 0.5),
1280 (int)(oGS.adfStrokeColor[1] * 255 + 0.5),
1281 (int)(oGS.adfStrokeColor[2] * 255 + 0.5),
1282 (int)(oGS.adfFillColor[0] * 255 + 0.5),
1283 (int)(oGS.adfFillColor[1] * 255 + 0.5),
1284 (int)(oGS.adfFillColor[2] * 255 + 0.5)));
1285 }
1286 }
1287 poGeom->assignSpatialReference(poCurLayer->GetSpatialRef());
1288 poFeature->SetGeometryDirectly(poGeom);
1289 CPL_IGNORE_RET_VAL(poCurLayer->CreateFeature(poFeature));
1290 delete poFeature;
1291 }
1292
1293 oCoords.resize(0);
1294 }
1295 }
1296
1297 szToken[0] = '\0';
1298 nTokenSize = 0;
1299 }
1300 }
1301
1302 if (nTokenStackSize != 0)
1303 {
1304 while(nTokenStackSize != 0)
1305 {
1306 nTokenStackSize--;
1307 CPLDebug("PDF",
1308 "Remaining values in stack : %s",
1309 aszTokenStack[nTokenStackSize]);
1310 }
1311 return nullptr;
1312 }
1313
1314 if (bCollectAllObjects)
1315 return nullptr;
1316
1317 return BuildGeometry(oCoords, bHasFoundFill, bHasMultiPart);
1318 }
1319
1320 /************************************************************************/
1321 /* BuildGeometry() */
1322 /************************************************************************/
1323
BuildGeometry(std::vector<double> & oCoords,int bHasFoundFill,int bHasMultiPart)1324 OGRGeometry* PDFDataset::BuildGeometry(std::vector<double>& oCoords,
1325 int bHasFoundFill,
1326 int bHasMultiPart)
1327 {
1328 OGRGeometry* poGeom = nullptr;
1329
1330 if (!oCoords.size())
1331 return nullptr;
1332
1333 if (oCoords.size() == 2)
1334 {
1335 double X, Y;
1336 PDFCoordsToSRSCoords(oCoords[0], oCoords[1], X, Y);
1337 poGeom = new OGRPoint(X, Y);
1338 }
1339 else if (!bHasFoundFill)
1340 {
1341 OGRLineString* poLS = nullptr;
1342 OGRMultiLineString* poMLS = nullptr;
1343 if (bHasMultiPart)
1344 {
1345 poMLS = new OGRMultiLineString();
1346 poGeom = poMLS;
1347 }
1348
1349 for(size_t i=0;i<oCoords.size();i+=2)
1350 {
1351 if (oCoords[i] == NEW_SUBPATH && oCoords[i+1] == NEW_SUBPATH)
1352 {
1353 if (poMLS)
1354 {
1355 poLS = new OGRLineString();
1356 poMLS->addGeometryDirectly(poLS);
1357 }
1358 else
1359 {
1360 delete poLS;
1361 poLS = new OGRLineString();
1362 poGeom = poLS;
1363 }
1364 }
1365 else if (oCoords[i] == CLOSE_SUBPATH && oCoords[i+1] == CLOSE_SUBPATH)
1366 {
1367 if (poLS && poLS->getNumPoints() >= 2 &&
1368 !(poLS->getX(0) == poLS->getX(poLS->getNumPoints()-1) &&
1369 poLS->getY(0) == poLS->getY(poLS->getNumPoints()-1)))
1370 {
1371 poLS->addPoint(poLS->getX(0), poLS->getY(0));
1372 }
1373 }
1374 else if (oCoords[i] == FILL_SUBPATH && oCoords[i+1] == FILL_SUBPATH)
1375 {
1376 /* Should not happen */
1377 }
1378 else
1379 {
1380 if (poLS)
1381 {
1382 double X, Y;
1383 PDFCoordsToSRSCoords(oCoords[i], oCoords[i+1], X, Y);
1384
1385 poLS->addPoint(X, Y);
1386 }
1387 }
1388 }
1389
1390 // Recognize points as written by GDAL (ogr-sym-2 : circle (not filled))
1391 OGRGeometry* poCenter = nullptr;
1392 if (poCenter == nullptr && poLS != nullptr && poLS->getNumPoints() == 1 + BEZIER_STEPS * 4 )
1393 {
1394 poCenter = PDFGetCircleCenter(poLS);
1395 }
1396
1397 // Recognize points as written by GDAL (ogr-sym-4: square (not filled))
1398 if (poCenter == nullptr && poLS != nullptr && (poLS->getNumPoints() == 4 || poLS->getNumPoints() == 5))
1399 {
1400 poCenter = PDFGetSquareCenter(poLS);
1401 }
1402
1403 // Recognize points as written by GDAL (ogr-sym-6: triangle (not filled))
1404 if (poCenter == nullptr && poLS != nullptr && (poLS->getNumPoints() == 3 || poLS->getNumPoints() == 4))
1405 {
1406 poCenter = PDFGetTriangleCenter(poLS);
1407 }
1408
1409 // Recognize points as written by GDAL (ogr-sym-8: star (not filled))
1410 if (poCenter == nullptr && poLS != nullptr && (poLS->getNumPoints() == 10 || poLS->getNumPoints() == 11))
1411 {
1412 poCenter = PDFGetStarCenter(poLS);
1413 }
1414
1415 if (poCenter == nullptr && poMLS != nullptr && poMLS->getNumGeometries() == 2)
1416 {
1417 const OGRLineString* poLS1 = poMLS->getGeometryRef(0);
1418 const OGRLineString* poLS2 = poMLS->getGeometryRef(1);
1419
1420 // Recognize points as written by GDAL (ogr-sym-0: cross (+) ).
1421 if (poLS1->getNumPoints() == 2 && poLS2->getNumPoints() == 2 &&
1422 poLS1->getY(0) == poLS1->getY(1) &&
1423 poLS2->getX(0) == poLS2->getX(1) &&
1424 fabs(fabs(poLS1->getX(0) - poLS1->getX(1)) - fabs(poLS2->getY(0) - poLS2->getY(1))) < EPSILON &&
1425 fabs((poLS1->getX(0) + poLS1->getX(1)) / 2 - poLS2->getX(0)) < EPSILON &&
1426 fabs((poLS2->getY(0) + poLS2->getY(1)) / 2 - poLS1->getY(0)) < EPSILON)
1427 {
1428 poCenter = new OGRPoint(poLS2->getX(0), poLS1->getY(0));
1429 }
1430 // Recognize points as written by GDAL (ogr-sym-1: diagcross (X) ).
1431 else if (poLS1->getNumPoints() == 2 && poLS2->getNumPoints() == 2 &&
1432 poLS1->getX(0) == poLS2->getX(0) &&
1433 poLS1->getY(0) == poLS2->getY(1) &&
1434 poLS1->getX(1) == poLS2->getX(1) &&
1435 poLS1->getY(1) == poLS2->getY(0) &&
1436 fabs(fabs(poLS1->getX(0) - poLS1->getX(1)) - fabs(poLS1->getY(0) - poLS1->getY(1))) < EPSILON)
1437 {
1438 poCenter = new OGRPoint((poLS1->getX(0) + poLS1->getX(1)) / 2,
1439 (poLS1->getY(0) + poLS1->getY(1)) / 2);
1440 }
1441 }
1442
1443 if (poCenter)
1444 {
1445 delete poGeom;
1446 poGeom = poCenter;
1447 }
1448 }
1449 else
1450 {
1451 OGRLinearRing* poLS = nullptr;
1452 int nPolys = 0;
1453 OGRGeometry** papoPoly = nullptr;
1454
1455 for(size_t i=0;i<oCoords.size();i+=2)
1456 {
1457 if (oCoords[i] == NEW_SUBPATH && oCoords[i+1] == NEW_SUBPATH)
1458 {
1459 if (poLS && poLS->getNumPoints() >= 3)
1460 {
1461 OGRPolygon* poPoly = new OGRPolygon();
1462 poPoly->addRingDirectly(poLS);
1463 poLS = nullptr;
1464
1465 papoPoly = (OGRGeometry**) CPLRealloc(papoPoly, (nPolys + 1) * sizeof(OGRGeometry*));
1466 papoPoly[nPolys ++] = poPoly;
1467 }
1468 delete poLS;
1469 poLS = new OGRLinearRing();
1470 }
1471 else if ((oCoords[i] == CLOSE_SUBPATH && oCoords[i+1] == CLOSE_SUBPATH) ||
1472 (oCoords[i] == FILL_SUBPATH && oCoords[i+1] == FILL_SUBPATH))
1473 {
1474 if (poLS)
1475 {
1476 poLS->closeRings();
1477
1478 std::unique_ptr<OGRPoint> poCenter;
1479
1480 if (nPolys == 0 &&
1481 poLS &&
1482 poLS->getNumPoints() == 1 + BEZIER_STEPS * 4 )
1483 {
1484 // Recognize points as written by GDAL (ogr-sym-3 : circle (filled))
1485 poCenter.reset(PDFGetCircleCenter(poLS));
1486 }
1487
1488 if (nPolys == 0 &&
1489 poCenter == nullptr &&
1490 poLS &&
1491 poLS->getNumPoints() == 5)
1492 {
1493 // Recognize points as written by GDAL (ogr-sym-5: square (filled))
1494 poCenter.reset(PDFGetSquareCenter(poLS));
1495
1496 /* ESRI points */
1497 if (poCenter == nullptr &&
1498 oCoords.size() == 14 &&
1499 poLS->getY(0) == poLS->getY(1) &&
1500 poLS->getX(1) == poLS->getX(2) &&
1501 poLS->getY(2) == poLS->getY(3) &&
1502 poLS->getX(3) == poLS->getX(0))
1503 {
1504 poCenter.reset(new OGRPoint((poLS->getX(0) + poLS->getX(1)) / 2,
1505 (poLS->getY(0) + poLS->getY(2)) / 2));
1506 }
1507 }
1508 // Recognize points as written by GDAL (ogr-sym-7: triangle (filled))
1509 else if (nPolys == 0 &&
1510 poLS &&
1511 poLS->getNumPoints() == 4)
1512 {
1513 poCenter.reset(PDFGetTriangleCenter(poLS));
1514 }
1515 // Recognize points as written by GDAL (ogr-sym-9: star (filled))
1516 else if (nPolys == 0 &&
1517 poLS &&
1518 poLS->getNumPoints() == 11)
1519 {
1520 poCenter.reset(PDFGetStarCenter(poLS));
1521 }
1522
1523 if (poCenter)
1524 {
1525 delete poGeom;
1526 poGeom = poCenter.release();
1527 break;
1528 }
1529
1530 if (poLS->getNumPoints() >= 3)
1531 {
1532 OGRPolygon* poPoly = new OGRPolygon();
1533 poPoly->addRingDirectly(poLS);
1534 poLS = nullptr;
1535
1536 papoPoly = (OGRGeometry**) CPLRealloc(papoPoly, (nPolys + 1) * sizeof(OGRGeometry*));
1537 papoPoly[nPolys ++] = poPoly;
1538 }
1539 else
1540 {
1541 delete poLS;
1542 poLS = nullptr;
1543 }
1544 }
1545 }
1546 else
1547 {
1548 if (poLS)
1549 {
1550 double X, Y;
1551 PDFCoordsToSRSCoords(oCoords[i], oCoords[i+1], X, Y);
1552
1553 poLS->addPoint(X, Y);
1554 }
1555 }
1556 }
1557
1558 delete poLS;
1559
1560 int bIsValidGeometry;
1561 if (nPolys == 2 &&
1562 papoPoly[0]->toPolygon()->getNumInteriorRings() == 0 &&
1563 papoPoly[1]->toPolygon()->getNumInteriorRings() == 0)
1564 {
1565 OGRLinearRing* poRing0 = papoPoly[0]->toPolygon()->getExteriorRing();
1566 OGRLinearRing* poRing1 = papoPoly[1]->toPolygon()->getExteriorRing();
1567 if (poRing0->getNumPoints() == poRing1->getNumPoints())
1568 {
1569 int bSameRing = TRUE;
1570 for(int i=0;i<poRing0->getNumPoints();i++)
1571 {
1572 if (poRing0->getX(i) != poRing1->getX(i))
1573 {
1574 bSameRing = FALSE;
1575 break;
1576 }
1577 if (poRing0->getY(i) != poRing1->getY(i))
1578 {
1579 bSameRing = FALSE;
1580 break;
1581 }
1582 }
1583
1584 /* Just keep on ring if they are identical */
1585 if (bSameRing)
1586 {
1587 delete papoPoly[1];
1588 nPolys = 1;
1589 }
1590 }
1591 }
1592 if (nPolys)
1593 {
1594 poGeom = OGRGeometryFactory::organizePolygons(
1595 papoPoly, nPolys, &bIsValidGeometry, nullptr);
1596 }
1597 CPLFree(papoPoly);
1598 }
1599
1600 return poGeom;
1601 }
1602
1603 /************************************************************************/
1604 /* ExploreContents() */
1605 /************************************************************************/
1606
ExploreContents(GDALPDFObject * poObj,GDALPDFObject * poResources,int nDepth,int & nVisited,bool & bStop)1607 void PDFDataset::ExploreContents(GDALPDFObject* poObj,
1608 GDALPDFObject* poResources,
1609 int nDepth,
1610 int& nVisited,
1611 bool& bStop)
1612 {
1613 std::map<CPLString, OGRPDFLayer*> oMapPropertyToLayer;
1614 if( nDepth == 10 || nVisited == 1000 )
1615 {
1616 CPLError(CE_Failure, CPLE_AppDefined,
1617 "ExploreContents(): too deep exploration or too many items");
1618 bStop = true;
1619 return;
1620 }
1621 if( bStop )
1622 return;
1623
1624 if (poObj->GetType() == PDFObjectType_Array)
1625 {
1626 GDALPDFArray* poArray = poObj->GetArray();
1627 for(int i=0;i<poArray->GetLength();i++)
1628 {
1629 GDALPDFObject* poSubObj = poArray->Get(i);
1630 if( poSubObj )
1631 {
1632 nVisited ++;
1633 ExploreContents(poSubObj, poResources, nDepth + 1, nVisited, bStop);
1634 if( bStop )
1635 return;
1636 }
1637 }
1638 }
1639
1640 if (poObj->GetType() != PDFObjectType_Dictionary)
1641 return;
1642
1643 GDALPDFStream* poStream = poObj->GetStream();
1644 if (!poStream)
1645 return;
1646
1647 char* pszStr = poStream->GetBytes();
1648 if (!pszStr)
1649 return;
1650
1651 const char* pszMCID = (const char*) pszStr;
1652 while((pszMCID = strstr(pszMCID, "/MCID")) != nullptr)
1653 {
1654 const char* pszBDC = strstr(pszMCID, "BDC");
1655 if (pszBDC)
1656 {
1657 /* Hack for http://www.avenza.com/sites/default/files/spatialpdf/US_County_Populations.pdf */
1658 /* FIXME: that logic is too fragile. */
1659 const char* pszStartParsing = pszBDC;
1660 const char* pszAfterBDC = pszBDC + 3;
1661 int bMatchQ = FALSE;
1662 while (pszAfterBDC[0] == ' ' || pszAfterBDC[0] == '\r' || pszAfterBDC[0] == '\n')
1663 pszAfterBDC ++;
1664 if (STARTS_WITH(pszAfterBDC, "0 0 m"))
1665 {
1666 const char* pszLastq = pszBDC;
1667 while(pszLastq > pszStr && *pszLastq != 'q')
1668 pszLastq --;
1669
1670 if (pszLastq > pszStr && *pszLastq == 'q' &&
1671 (pszLastq[-1] == ' ' || pszLastq[-1] == '\r' || pszLastq[-1] == '\n') &&
1672 (pszLastq[1] == ' ' || pszLastq[1] == '\r' || pszLastq[1] == '\n'))
1673 {
1674 pszStartParsing = pszLastq;
1675 bMatchQ = TRUE;
1676 }
1677 }
1678
1679 int nMCID = atoi(pszMCID + 6);
1680 if (GetGeometryFromMCID(nMCID) == nullptr)
1681 {
1682 OGRGeometry* poGeom = ParseContent(pszStartParsing, poResources,
1683 !bMatchQ, bMatchQ, oMapPropertyToLayer, nullptr);
1684 if( poGeom != nullptr )
1685 {
1686 /* Save geometry in map */
1687 oMapMCID[nMCID] = poGeom;
1688 }
1689 }
1690 }
1691 pszMCID += 5;
1692 }
1693 CPLFree(pszStr);
1694 }
1695
1696 /************************************************************************/
1697 /* ExploreContentsNonStructured() */
1698 /************************************************************************/
1699
ExploreContentsNonStructuredInternal(GDALPDFObject * poContents,GDALPDFObject * poResources,std::map<CPLString,OGRPDFLayer * > & oMapPropertyToLayer,OGRPDFLayer * poSingleLayer)1700 void PDFDataset::ExploreContentsNonStructuredInternal(GDALPDFObject* poContents,
1701 GDALPDFObject* poResources,
1702 std::map<CPLString, OGRPDFLayer*>& oMapPropertyToLayer,
1703 OGRPDFLayer* poSingleLayer)
1704 {
1705 if (poContents->GetType() == PDFObjectType_Array)
1706 {
1707 GDALPDFArray* poArray = poContents->GetArray();
1708 char* pszConcatStr = nullptr;
1709 int nConcatLen = 0;
1710 for(int i=0;i<poArray->GetLength();i++)
1711 {
1712 GDALPDFObject* poObj = poArray->Get(i);
1713 if( poObj == nullptr || poObj->GetType() != PDFObjectType_Dictionary)
1714 break;
1715 GDALPDFStream* poStream = poObj->GetStream();
1716 if (!poStream)
1717 break;
1718 char* pszStr = poStream->GetBytes();
1719 if (!pszStr)
1720 break;
1721 int nLen = (int)strlen(pszStr);
1722 char* pszConcatStrNew = (char*)CPLRealloc(pszConcatStr, nConcatLen + nLen + 1);
1723 if( pszConcatStrNew == nullptr )
1724 {
1725 CPLFree(pszStr);
1726 break;
1727 }
1728 pszConcatStr = pszConcatStrNew;
1729 memcpy(pszConcatStr + nConcatLen, pszStr, nLen+1);
1730 nConcatLen += nLen;
1731 CPLFree(pszStr);
1732 }
1733 if( pszConcatStr )
1734 ParseContent(pszConcatStr, poResources, FALSE, FALSE, oMapPropertyToLayer, poSingleLayer);
1735 CPLFree(pszConcatStr);
1736 return;
1737 }
1738
1739 if (poContents->GetType() != PDFObjectType_Dictionary)
1740 return;
1741
1742 GDALPDFStream* poStream = poContents->GetStream();
1743 if (!poStream)
1744 return;
1745
1746 char* pszStr = poStream->GetBytes();
1747 if( !pszStr )
1748 return;
1749 ParseContent(pszStr, poResources, FALSE, FALSE, oMapPropertyToLayer, poSingleLayer);
1750 CPLFree(pszStr);
1751 }
1752
ExploreContentsNonStructured(GDALPDFObject * poContents,GDALPDFObject * poResources)1753 void PDFDataset::ExploreContentsNonStructured(GDALPDFObject* poContents,
1754 GDALPDFObject* poResources)
1755 {
1756 std::map<CPLString, OGRPDFLayer*> oMapPropertyToLayer;
1757 if (poResources != nullptr &&
1758 poResources->GetType() == PDFObjectType_Dictionary)
1759 {
1760 GDALPDFObject* poProperties =
1761 poResources->GetDictionary()->Get("Properties");
1762 if (poProperties != nullptr &&
1763 poProperties->GetType() == PDFObjectType_Dictionary)
1764 {
1765 std::map< std::pair<int, int>, OGRPDFLayer *> oMapNumGenToLayer;
1766 for(const auto& oLayerWithref: aoLayerWithRef )
1767 {
1768 CPLString osSanitizedName(PDFSanitizeLayerName(oLayerWithref.osName));
1769
1770 OGRPDFLayer* poLayer = (OGRPDFLayer*) GetLayerByName(osSanitizedName.c_str());
1771 if (poLayer == nullptr)
1772 {
1773 auto poSRSOri = GetSpatialRef();
1774 OGRSpatialReference* poSRS = poSRSOri ? poSRSOri->Clone() : nullptr;
1775 poLayer =
1776 new OGRPDFLayer(this, osSanitizedName.c_str(), poSRS, wkbUnknown);
1777 if( poSRS )
1778 poSRS->Release();
1779
1780 papoLayers = (OGRLayer**)
1781 CPLRealloc(papoLayers, (nLayers + 1) * sizeof(OGRLayer*));
1782 papoLayers[nLayers] = poLayer;
1783 nLayers ++;
1784 }
1785
1786 oMapNumGenToLayer[ std::pair<int,int>(oLayerWithref.nOCGNum.toInt(), oLayerWithref.nOCGGen) ] = poLayer;
1787 }
1788
1789 std::map<CPLString, GDALPDFObject*>& oMap =
1790 poProperties->GetDictionary()->GetValues();
1791 std::map<CPLString, GDALPDFObject*>::iterator oIter = oMap.begin();
1792 std::map<CPLString, GDALPDFObject*>::iterator oEnd = oMap.end();
1793
1794 for(; oIter != oEnd; ++oIter)
1795 {
1796 const char* pszKey = oIter->first.c_str();
1797 GDALPDFObject* poObj = oIter->second;
1798 if( poObj->GetRefNum().toBool() )
1799 {
1800 std::map< std::pair<int, int>, OGRPDFLayer *>::iterator
1801 oIterNumGenToLayer = oMapNumGenToLayer.find(
1802 std::pair<int,int>(poObj->GetRefNum().toInt(), poObj->GetRefGen()) );
1803 if( oIterNumGenToLayer != oMapNumGenToLayer.end() )
1804 {
1805 oMapPropertyToLayer[pszKey] = oIterNumGenToLayer->second;
1806 }
1807 }
1808 }
1809 }
1810 }
1811
1812 OGRPDFLayer* poSingleLayer = nullptr;
1813 if( nLayers == 0 )
1814 {
1815 if( CPLTestBool(CPLGetConfigOption("OGR_PDF_READ_NON_STRUCTURED", "NO")) )
1816 {
1817 OGRPDFLayer *poLayer =
1818 new OGRPDFLayer(this, "content", nullptr, wkbUnknown);
1819 papoLayers = (OGRLayer**)
1820 CPLRealloc(papoLayers, (nLayers + 1) * sizeof(OGRLayer*));
1821 papoLayers[nLayers] = poLayer;
1822 nLayers ++;
1823 poSingleLayer = poLayer;
1824 }
1825 else
1826 {
1827 return;
1828 }
1829 }
1830
1831 ExploreContentsNonStructuredInternal(poContents,
1832 poResources,
1833 oMapPropertyToLayer,
1834 poSingleLayer);
1835
1836 /* Remove empty layers */
1837 int i = 0;
1838 while(i < nLayers)
1839 {
1840 if (papoLayers[i]->GetFeatureCount() == 0)
1841 {
1842 delete papoLayers[i];
1843 if (i < nLayers - 1)
1844 {
1845 memmove(papoLayers + i, papoLayers + i + 1,
1846 (nLayers - 1 - i) * sizeof(OGRPDFLayer*));
1847 }
1848 nLayers --;
1849 }
1850 else
1851 i ++;
1852 }
1853 }
1854
1855 #endif /* HAVE_PDF_READ_SUPPORT */
1856