1 /******************************************************************************
2  * $Id: ogr_osm.h 09a48d5214b089c224b3b7afed5beee254d45614 2021-08-15 12:04:53 +0200 Even Rouault $
3  *
4  * Project:  OpenGIS Simple Features Reference Implementation
5  * Purpose:  Private definitions for OGR/OpenStreeMap driver.
6  * Author:   Even Rouault, <even dot rouault at spatialys.com>
7  *
8  ******************************************************************************
9  * Copyright (c) 2012-2014, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #ifndef OGR_OSM_H_INCLUDED
31 #define OGR_OSM_H_INCLUDED
32 
33 // replace O(log2(N)) complexity of FindNode() by O(1)
34 #define ENABLE_NODE_LOOKUP_BY_HASHING 1
35 
36 #include "ogrsf_frmts.h"
37 #include "cpl_string.h"
38 
39 #include <array>
40 #include <set>
41 #include <unordered_set>
42 #include <map>
43 #include <vector>
44 
45 #include "osm_parser.h"
46 
47 #include "ogrsqlitevfs.h"
48 
49 class ConstCharComp
50 {
51     public:
operator()52         bool operator()(const char* a, const char* b) const
53         {
54             return strcmp(a, b) < 0;
55         }
56 };
57 
58 class OGROSMComputedAttribute
59 {
60     public:
61         CPLString    osName;
62         int          nIndex;
63         OGRFieldType eType;
64         CPLString    osSQL;
65         sqlite3_stmt  *hStmt;
66         std::vector<CPLString> aosAttrToBind;
67         std::vector<int> anIndexToBind;
68         bool         bHardcodedZOrder;
69 
OGROSMComputedAttribute()70         OGROSMComputedAttribute() : nIndex(-1), eType(OFTString), hStmt(nullptr), bHardcodedZOrder(false) {}
OGROSMComputedAttribute(const char * pszName)71         explicit OGROSMComputedAttribute(const char* pszName) :
72                 osName(pszName), nIndex(-1), eType(OFTString), hStmt(nullptr), bHardcodedZOrder(false) {}
73 };
74 
75 /************************************************************************/
76 /*                           OGROSMLayer                                */
77 /************************************************************************/
78 
79 class OGROSMDataSource;
80 
81 class OGROSMLayer final: public OGRLayer
82 {
83     friend class OGROSMDataSource;
84 
85     OGROSMDataSource    *poDS;
86     int                  nIdxLayer;
87     OGRFeatureDefn      *poFeatureDefn;
88     OGRSpatialReference *poSRS;
89     long                 nFeatureCount;
90 
91     std::vector<char*>   apszNames; /* Needed to keep a "reference" to the string inserted into oMapFieldNameToIndex */
92     std::map<const char*, int, ConstCharComp> oMapFieldNameToIndex;
93 
94     std::vector<OGROSMComputedAttribute> oComputedAttributes;
95 
96     bool                 bResetReadingAllowed;
97 
98     int                  nFeatureArraySize;
99     int                  nFeatureArrayMaxSize;
100     int                  nFeatureArrayIndex;
101     OGRFeature**         papoFeatures;
102 
103     bool                  bHasOSMId;
104     int                   nIndexOSMId;
105     int                   nIndexOSMWayId;
106     bool                  bHasVersion;
107     bool                  bHasTimestamp;
108     bool                  bHasUID;
109     bool                  bHasUser;
110     bool                  bHasChangeset;
111     bool                  bHasOtherTags;
112     int                   nIndexOtherTags;
113     bool                  bHasAllTags;
114     int                   nIndexAllTags;
115 
116     bool                  bHasWarnedTooManyFeatures;
117 
118     char                 *pszAllTags;
119     bool                  bHasWarnedAllTagsTruncated;
120 
121     bool                  bUserInterested;
122 
123     bool                  AddToArray( OGRFeature* poFeature,
124                                       int bCheckFeatureThreshold );
125 
126     int                   AddInOtherOrAllTags(const char* pszK);
127 
128     char                  szLaunderedFieldName[256];
129     const char*           GetLaunderedFieldName(const char* pszName);
130 
131     std::vector<char*>    apszInsignificantKeys;
132     std::map<const char*, int, ConstCharComp> aoSetInsignificantKeys;
133 
134     std::vector<char*>    apszIgnoreKeys;
135     std::map<const char*, int, ConstCharComp> aoSetIgnoreKeys;
136 
137     std::set<std::string> aoSetWarnKeys;
138 
139   public:
140                         OGROSMLayer( OGROSMDataSource* poDS,
141                                      int nIdxLayer,
142                                      const char* pszName );
143     virtual             ~OGROSMLayer();
144 
GetLayerDefn()145     virtual OGRFeatureDefn *GetLayerDefn() override {return poFeatureDefn;}
146 
147     virtual void        ResetReading() override;
148     virtual int         TestCapability( const char * ) override;
149 
150     virtual OGRFeature *GetNextFeature() override;
151 
152     OGRFeature*         MyGetNextFeature( OGROSMLayer** ppoNewCurLayer,
153                                           GDALProgressFunc pfnProgress,
154                                           void* pProgressData );
155 
156     virtual GIntBig     GetFeatureCount( int bForce ) override;
157 
158     virtual OGRErr      SetAttributeFilter( const char* pszAttrQuery ) override;
159 
160     virtual OGRErr      GetExtent( OGREnvelope *psExtent, int bForce ) override;
GetExtent(int iGeomField,OGREnvelope * psExtent,int bForce)161     virtual OGRErr      GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) override
162                 { return OGRLayer::GetExtent(iGeomField, psExtent, bForce); }
163 
164     const OGREnvelope*  GetSpatialFilterEnvelope();
165 
166     int                 AddFeature(OGRFeature* poFeature,
167                                    int bAttrFilterAlreadyEvaluated,
168                                    int* pbFilteredOut = nullptr,
169                                    int bCheckFeatureThreshold = TRUE);
170     void                ForceResetReading();
171 
172     void                AddField(const char* pszName, OGRFieldType eFieldType);
173     int                 GetFieldIndex(const char* pszName);
174 
HasOSMId()175     bool                HasOSMId() const { return bHasOSMId; }
SetHasOSMId(bool bIn)176     void                SetHasOSMId(bool bIn) { bHasOSMId = bIn; }
177 
HasVersion()178     bool                HasVersion() const { return bHasVersion; }
SetHasVersion(bool bIn)179     void                SetHasVersion(bool bIn) { bHasVersion = bIn; }
180 
HasTimestamp()181     bool                HasTimestamp() const { return bHasTimestamp; }
SetHasTimestamp(bool bIn)182     void                SetHasTimestamp(bool bIn) { bHasTimestamp = bIn; }
183 
HasUID()184     bool                HasUID() const { return bHasUID; }
SetHasUID(bool bIn)185     void                SetHasUID(bool bIn) { bHasUID = bIn; }
186 
HasUser()187     bool                HasUser() const { return bHasUser; }
SetHasUser(bool bIn)188     void                SetHasUser(bool bIn) { bHasUser = bIn; }
189 
HasChangeset()190     bool                HasChangeset() const { return bHasChangeset; }
SetHasChangeset(bool bIn)191     void                SetHasChangeset(bool bIn) { bHasChangeset = bIn; }
192 
HasOtherTags()193     bool                HasOtherTags() const { return bHasOtherTags; }
SetHasOtherTags(bool bIn)194     void                SetHasOtherTags(bool bIn) { bHasOtherTags = bIn; }
195 
HasAllTags()196     bool                HasAllTags() const { return bHasAllTags; }
SetHasAllTags(bool bIn)197     void                SetHasAllTags(bool bIn) { bHasAllTags = bIn; }
198 
199     void                SetFieldsFromTags(OGRFeature* poFeature,
200                                           GIntBig nID,
201                                           bool bIsWayID,
202                                           unsigned int nTags, OSMTag* pasTags,
203                                           OSMInfo* psInfo);
204 
SetDeclareInterest(bool bIn)205     void                SetDeclareInterest(bool bIn) { bUserInterested = bIn; }
IsUserInterested()206     bool                IsUserInterested() const { return bUserInterested; }
207 
HasAttributeFilter()208     int                 HasAttributeFilter() const { return m_poAttrQuery != nullptr; }
209     int                 EvaluateAttributeFilter(OGRFeature* poFeature);
210 
211     void                AddInsignificantKey(const char* pszK);
IsSignificantKey(const char * pszK)212     int                 IsSignificantKey(const char* pszK) const
213         { return aoSetInsignificantKeys.find(pszK) == aoSetInsignificantKeys.end(); }
214 
215     void                AddIgnoreKey(const char* pszK);
216     void                AddWarnKey(const char* pszK);
217 
218     void                AddComputedAttribute(const char* pszName,
219                                              OGRFieldType eType,
220                                              const char* pszSQL);
221 };
222 
223 /************************************************************************/
224 /*                        OGROSMDataSource                              */
225 /************************************************************************/
226 
227 typedef struct
228 {
229     char* pszK;
230     int nKeyIndex;
231     int nOccurrences;
232     std::vector<char*> asValues;
233     std::map<const char*, int, ConstCharComp> anMapV; /* map that is the reverse of asValues */
234 } KeyDesc;
235 
236 typedef struct
237 {
238     short               nKeyIndex; /* index of OGROSMDataSource.asKeys */
239     short               bVIsIndex; /* whether we should use nValueIndex or nOffsetInpabyNonRedundantValues */
240     union
241     {
242         int                 nValueIndex; /* index of KeyDesc.asValues */
243         int                 nOffsetInpabyNonRedundantValues; /* offset in OGROSMDataSource.pabyNonRedundantValues */
244     } u;
245 } IndexedKVP;
246 
247 typedef struct
248 {
249     GIntBig             nOff;
250     /* Note: only one of nth bucket pabyBitmap or panSectorSize must be free'd */
251     union
252     {
253         GByte          *pabyBitmap;    /* array of BUCKET_BITMAP_SIZE bytes */
254         GByte          *panSectorSize; /* array of BUCKET_SECTOR_SIZE_ARRAY_SIZE bytes. Each values means (size in bytes - 8 ) / 2, minus 8. 252 means uncompressed */
255     } u;
256 } Bucket;
257 
258 typedef struct
259 {
260     int               nLon;
261     int               nLat;
262 } LonLat;
263 
264 typedef struct
265 {
266     GIntBig             nWayID;
267     GIntBig*            panNodeRefs; /* point to a sub-array of OGROSMDataSource.anReqIds */
268     unsigned int        nRefs;
269     unsigned int        nTags;
270     IndexedKVP*         pasTags; /*  point to a sub-array of OGROSMDataSource.pasAccumulatedTags */
271     OSMInfo             sInfo;
272     OGRFeature         *poFeature;
273     EMULATED_BOOL       bIsArea : 1;
274     EMULATED_BOOL       bAttrFilterAlreadyEvaluated : 1;
275 } WayFeaturePair;
276 
277 #ifdef ENABLE_NODE_LOOKUP_BY_HASHING
278 typedef struct
279 {
280     int nInd;           /* values are indexes of panReqIds */
281     int nNext;          /* values are indexes of psCollisionBuckets, or -1 to stop the chain */
282 } CollisionBucket;
283 #endif
284 
285 class OGROSMDataSource final: public OGRDataSource
286 {
287     friend class OGROSMLayer;
288 
289     int                 nLayers;
290     OGROSMLayer**       papoLayers;
291     char*               pszName;
292 
293     OGREnvelope         sExtent;
294     bool                bExtentValid;
295 
296     // Starts off at -1 to indicate that we do not know.
297     int                 bInterleavedReading;
298     OGROSMLayer        *poCurrentLayer;
299 
300     OSMContext         *psParser;
301     bool                bHasParsedFirstChunk;
302     bool                bStopParsing;
303 
304     sqlite3_vfs*        pMyVFS;
305 
306     sqlite3            *hDB;
307     sqlite3_stmt       *hInsertNodeStmt;
308     sqlite3_stmt       *hInsertWayStmt;
309     sqlite3_stmt       *hSelectNodeBetweenStmt;
310     sqlite3_stmt      **pahSelectNodeStmt;
311     sqlite3_stmt      **pahSelectWayStmt;
312     sqlite3_stmt       *hInsertPolygonsStandaloneStmt;
313     sqlite3_stmt       *hDeletePolygonsStandaloneStmt;
314     sqlite3_stmt       *hSelectPolygonsStandaloneStmt;
315     bool                bHasRowInPolygonsStandalone;
316 
317     sqlite3            *hDBForComputedAttributes;
318 
319     int                 nMaxSizeForInMemoryDBInMB;
320     bool                bInMemoryTmpDB;
321     bool                bMustUnlink;
322     CPLString           osTmpDBName;
323 
324     int                 nNodesInTransaction;
325 
326     std::unordered_set<std::string> aoSetClosedWaysArePolygons;
327     int                 nMinSizeKeysInSetClosedWaysArePolygons;
328     int                 nMaxSizeKeysInSetClosedWaysArePolygons;
329 
330     std::vector<LonLat> m_asLonLatCache{};
331 
332     std::array<const char*, 7>  m_ignoredKeys;
333 
334     bool                bReportAllNodes;
335     bool                bReportAllWays;
336 
337     bool                bFeatureAdded;
338 
339     bool                bInTransaction;
340 
341     bool                bIndexPoints;
342     bool                bUsePointsIndex;
343     bool                bIndexWays;
344     bool                bUseWaysIndex;
345 
346     std::vector<bool>   abSavedDeclaredInterest;
347     OGRLayer*           poResultSetLayer;
348     bool                bIndexPointsBackup;
349     bool                bUsePointsIndexBackup;
350     bool                bIndexWaysBackup;
351     bool                bUseWaysIndexBackup;
352 
353     bool                bIsFeatureCountEnabled;
354 
355     bool                bAttributeNameLaundering;
356 
357     std::vector<GByte>  m_abyWayBuffer{};
358 
359     int                 nWaysProcessed;
360     int                 nRelationsProcessed;
361 
362     bool                bCustomIndexing;
363     bool                bCompressNodes;
364 
365     unsigned int        nUnsortedReqIds;
366     GIntBig            *panUnsortedReqIds;
367 
368     unsigned int        nReqIds;
369     GIntBig            *panReqIds;
370 
371 #ifdef ENABLE_NODE_LOOKUP_BY_HASHING
372     bool                bEnableHashedIndex;
373     /* values >= 0 are indexes of panReqIds. */
374     /*        == -1 for unoccupied */
375     /*        < -1 are expressed as -nIndexToCollisionBuckets-2 where nIndexToCollisionBuckets point to psCollisionBuckets */
376     int                *panHashedIndexes;
377     CollisionBucket    *psCollisionBuckets;
378     bool                bHashedIndexValid;
379 #endif
380 
381     LonLat             *pasLonLatArray;
382 
383     IndexedKVP         *pasAccumulatedTags; /* points to content of pabyNonRedundantValues or aoMapIndexedKeys */
384     int                 nAccumulatedTags;
385     GByte              *pabyNonRedundantValues;
386     int                 nNonRedundantValuesLen;
387     WayFeaturePair     *pasWayFeaturePairs;
388     int                 nWayFeaturePairs;
389 
390     int                          nNextKeyIndex;
391     std::vector<KeyDesc*>         asKeys;
392     std::map<const char*, KeyDesc*, ConstCharComp> aoMapIndexedKeys; /* map that is the reverse of asKeys */
393 
394     CPLString           osNodesFilename;
395     bool                bInMemoryNodesFile;
396     bool                bMustUnlinkNodesFile;
397     GIntBig             nNodesFileSize;
398     VSILFILE           *fpNodes;
399 
400     GIntBig             nPrevNodeId;
401     int                 nBucketOld;
402     int                 nOffInBucketReducedOld;
403     GByte              *pabySector;
404     std::map<int, Bucket> oMapBuckets;
405     Bucket*             GetBucket(int nBucketId);
406 
407     bool                bNeedsToSaveWayInfo;
408 
409     static const GIntBig FILESIZE_NOT_INIT = -2;
410     static const GIntBig FILESIZE_INVALID = -1;
411     GIntBig             m_nFileSize;
412 
413     void                CompressWay (bool bIsArea, unsigned int nTags, IndexedKVP* pasTags,
414                                      int nPoints, LonLat* pasLonLatPairs,
415                                      OSMInfo* psInfo,
416                                      std::vector<GByte> &abyCompressedWay);
417     void                UncompressWay( int nBytes, const GByte* pabyCompressedWay,
418                                        bool *pbIsArea,
419                                        std::vector<LonLat>& asCoords,
420                                        unsigned int* pnTags, OSMTag* pasTags,
421                                        OSMInfo* psInfo );
422 
423     bool                ParseConf(char** papszOpenOptions);
424     bool                CreateTempDB();
425     bool                SetDBOptions();
426     bool                SetCacheSize();
427     bool                CreatePreparedStatements();
428     void                CloseDB();
429 
430     bool                IndexPoint( OSMNode* psNode );
431     bool                IndexPointSQLite( OSMNode* psNode );
432     bool                FlushCurrentSector();
433     bool                FlushCurrentSectorCompressedCase();
434     bool                FlushCurrentSectorNonCompressedCase();
435     bool                IndexPointCustom( OSMNode* psNode );
436 
437     void                IndexWay(GIntBig nWayID, bool bIsArea,
438                                  unsigned int nTags, IndexedKVP* pasTags,
439                                  LonLat* pasLonLatPairs, int nPairs,
440                                  OSMInfo* psInfo);
441 
442     bool                StartTransactionCacheDB();
443     bool                CommitTransactionCacheDB();
444 
445     int                 FindNode(GIntBig nID);
446     void                ProcessWaysBatch();
447 
448     void                ProcessPolygonsStandalone();
449 
450     void                LookupNodes();
451     void                LookupNodesSQLite();
452     void                LookupNodesCustom();
453     void                LookupNodesCustomCompressedCase();
454     void                LookupNodesCustomNonCompressedCase();
455 
456     unsigned int        LookupWays( std::map< GIntBig, std::pair<int,void*> >& aoMapWays,
457                                     OSMRelation* psRelation );
458 
459     OGRGeometry*        BuildMultiPolygon(OSMRelation* psRelation,
460                                           unsigned int* pnTags,
461                                           OSMTag* pasTags);
462     OGRGeometry*        BuildGeometryCollection(OSMRelation* psRelation, int bMultiLineString);
463 
464     bool                TransferToDiskIfNecesserary();
465 
466     Bucket*             AllocBucket(int iBucket);
467 
468     void                AddComputedAttributes(int iCurLayer,
469                                              const std::vector<OGROSMComputedAttribute>& oAttributes);
470     bool                IsClosedWayTaggedAsPolygon( unsigned int nTags, const OSMTag* pasTags );
471 
472   public:
473                         OGROSMDataSource();
474                         virtual ~OGROSMDataSource();
475 
GetName()476     virtual const char *GetName() override { return pszName; }
GetLayerCount()477     virtual int         GetLayerCount() override { return nLayers; }
478     virtual OGRLayer   *GetLayer( int ) override;
479 
480     virtual int         TestCapability( const char * ) override;
481 
482     virtual OGRLayer *  ExecuteSQL( const char *pszSQLCommand,
483                                     OGRGeometry *poSpatialFilter,
484                                     const char *pszDialect ) override;
485     virtual void        ReleaseResultSet( OGRLayer * poLayer ) override;
486 
487     virtual void        ResetReading() override;
488     virtual OGRFeature* GetNextFeature( OGRLayer** ppoBelongingLayer,
489                                         double* pdfProgressPct,
490                                        GDALProgressFunc pfnProgress,
491                                         void* pProgressData ) override;
492 
493     int                 Open ( const char* pszFilename, char** papszOpenOptions );
494 
495     int                 MyResetReading();
496     bool                ParseNextChunk(int nIdxLayer,
497                                        GDALProgressFunc pfnProgress,
498                                        void* pProgressData);
499     OGRErr              GetExtent( OGREnvelope *psExtent );
500     int                 IsInterleavedReading();
501 
502     void                NotifyNodes(unsigned int nNodes, OSMNode* pasNodes);
503     void                NotifyWay (OSMWay* psWay);
504     void                NotifyRelation (OSMRelation* psRelation);
505     void                NotifyBounds (double dfXMin, double dfYMin,
506                                       double dfXMax, double dfYMax);
507 
GetCurrentLayer()508     OGROSMLayer*        GetCurrentLayer() { return poCurrentLayer; }
SetCurrentLayer(OGROSMLayer * poLyr)509     void                SetCurrentLayer(OGROSMLayer* poLyr) { poCurrentLayer = poLyr; }
510 
IsFeatureCountEnabled()511     bool                IsFeatureCountEnabled() const { return bIsFeatureCountEnabled; }
512 
DoesAttributeNameLaundering()513     bool                DoesAttributeNameLaundering() const { return bAttributeNameLaundering; }
514 };
515 
516 #endif /* ndef OGR_OSM_H_INCLUDED */
517