1 /****************************************************************************** 2 * $Id: ogr_osm.h 09a48d5214b089c224b3b7afed5beee254d45614 2021-08-15 12:04:53 +0200 Even Rouault $ 3 * 4 * Project: OpenGIS Simple Features Reference Implementation 5 * Purpose: Private definitions for OGR/OpenStreeMap driver. 6 * Author: Even Rouault, <even dot rouault at spatialys.com> 7 * 8 ****************************************************************************** 9 * Copyright (c) 2012-2014, Even Rouault <even dot rouault at spatialys.com> 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a 12 * copy of this software and associated documentation files (the "Software"), 13 * to deal in the Software without restriction, including without limitation 14 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 15 * and/or sell copies of the Software, and to permit persons to whom the 16 * Software is furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included 19 * in all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 22 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 * DEALINGS IN THE SOFTWARE. 28 ****************************************************************************/ 29 30 #ifndef OGR_OSM_H_INCLUDED 31 #define OGR_OSM_H_INCLUDED 32 33 // replace O(log2(N)) complexity of FindNode() by O(1) 34 #define ENABLE_NODE_LOOKUP_BY_HASHING 1 35 36 #include "ogrsf_frmts.h" 37 #include "cpl_string.h" 38 39 #include <array> 40 #include <set> 41 #include <unordered_set> 42 #include <map> 43 #include <vector> 44 45 #include "osm_parser.h" 46 47 #include "ogrsqlitevfs.h" 48 49 class ConstCharComp 50 { 51 public: operator()52 bool operator()(const char* a, const char* b) const 53 { 54 return strcmp(a, b) < 0; 55 } 56 }; 57 58 class OGROSMComputedAttribute 59 { 60 public: 61 CPLString osName; 62 int nIndex; 63 OGRFieldType eType; 64 CPLString osSQL; 65 sqlite3_stmt *hStmt; 66 std::vector<CPLString> aosAttrToBind; 67 std::vector<int> anIndexToBind; 68 bool bHardcodedZOrder; 69 OGROSMComputedAttribute()70 OGROSMComputedAttribute() : nIndex(-1), eType(OFTString), hStmt(nullptr), bHardcodedZOrder(false) {} OGROSMComputedAttribute(const char * pszName)71 explicit OGROSMComputedAttribute(const char* pszName) : 72 osName(pszName), nIndex(-1), eType(OFTString), hStmt(nullptr), bHardcodedZOrder(false) {} 73 }; 74 75 /************************************************************************/ 76 /* OGROSMLayer */ 77 /************************************************************************/ 78 79 class OGROSMDataSource; 80 81 class OGROSMLayer final: public OGRLayer 82 { 83 friend class OGROSMDataSource; 84 85 OGROSMDataSource *poDS; 86 int nIdxLayer; 87 OGRFeatureDefn *poFeatureDefn; 88 OGRSpatialReference *poSRS; 89 long nFeatureCount; 90 91 std::vector<char*> apszNames; /* Needed to keep a "reference" to the string inserted into oMapFieldNameToIndex */ 92 std::map<const char*, int, ConstCharComp> oMapFieldNameToIndex; 93 94 std::vector<OGROSMComputedAttribute> oComputedAttributes; 95 96 bool bResetReadingAllowed; 97 98 int nFeatureArraySize; 99 int nFeatureArrayMaxSize; 100 int nFeatureArrayIndex; 101 OGRFeature** papoFeatures; 102 103 bool bHasOSMId; 104 int nIndexOSMId; 105 int nIndexOSMWayId; 106 bool bHasVersion; 107 bool bHasTimestamp; 108 bool bHasUID; 109 bool bHasUser; 110 bool bHasChangeset; 111 bool bHasOtherTags; 112 int nIndexOtherTags; 113 bool bHasAllTags; 114 int nIndexAllTags; 115 116 bool bHasWarnedTooManyFeatures; 117 118 char *pszAllTags; 119 bool bHasWarnedAllTagsTruncated; 120 121 bool bUserInterested; 122 123 bool AddToArray( OGRFeature* poFeature, 124 int bCheckFeatureThreshold ); 125 126 int AddInOtherOrAllTags(const char* pszK); 127 128 char szLaunderedFieldName[256]; 129 const char* GetLaunderedFieldName(const char* pszName); 130 131 std::vector<char*> apszInsignificantKeys; 132 std::map<const char*, int, ConstCharComp> aoSetInsignificantKeys; 133 134 std::vector<char*> apszIgnoreKeys; 135 std::map<const char*, int, ConstCharComp> aoSetIgnoreKeys; 136 137 std::set<std::string> aoSetWarnKeys; 138 139 public: 140 OGROSMLayer( OGROSMDataSource* poDS, 141 int nIdxLayer, 142 const char* pszName ); 143 virtual ~OGROSMLayer(); 144 GetLayerDefn()145 virtual OGRFeatureDefn *GetLayerDefn() override {return poFeatureDefn;} 146 147 virtual void ResetReading() override; 148 virtual int TestCapability( const char * ) override; 149 150 virtual OGRFeature *GetNextFeature() override; 151 152 OGRFeature* MyGetNextFeature( OGROSMLayer** ppoNewCurLayer, 153 GDALProgressFunc pfnProgress, 154 void* pProgressData ); 155 156 virtual GIntBig GetFeatureCount( int bForce ) override; 157 158 virtual OGRErr SetAttributeFilter( const char* pszAttrQuery ) override; 159 160 virtual OGRErr GetExtent( OGREnvelope *psExtent, int bForce ) override; GetExtent(int iGeomField,OGREnvelope * psExtent,int bForce)161 virtual OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) override 162 { return OGRLayer::GetExtent(iGeomField, psExtent, bForce); } 163 164 const OGREnvelope* GetSpatialFilterEnvelope(); 165 166 int AddFeature(OGRFeature* poFeature, 167 int bAttrFilterAlreadyEvaluated, 168 int* pbFilteredOut = nullptr, 169 int bCheckFeatureThreshold = TRUE); 170 void ForceResetReading(); 171 172 void AddField(const char* pszName, OGRFieldType eFieldType); 173 int GetFieldIndex(const char* pszName); 174 HasOSMId()175 bool HasOSMId() const { return bHasOSMId; } SetHasOSMId(bool bIn)176 void SetHasOSMId(bool bIn) { bHasOSMId = bIn; } 177 HasVersion()178 bool HasVersion() const { return bHasVersion; } SetHasVersion(bool bIn)179 void SetHasVersion(bool bIn) { bHasVersion = bIn; } 180 HasTimestamp()181 bool HasTimestamp() const { return bHasTimestamp; } SetHasTimestamp(bool bIn)182 void SetHasTimestamp(bool bIn) { bHasTimestamp = bIn; } 183 HasUID()184 bool HasUID() const { return bHasUID; } SetHasUID(bool bIn)185 void SetHasUID(bool bIn) { bHasUID = bIn; } 186 HasUser()187 bool HasUser() const { return bHasUser; } SetHasUser(bool bIn)188 void SetHasUser(bool bIn) { bHasUser = bIn; } 189 HasChangeset()190 bool HasChangeset() const { return bHasChangeset; } SetHasChangeset(bool bIn)191 void SetHasChangeset(bool bIn) { bHasChangeset = bIn; } 192 HasOtherTags()193 bool HasOtherTags() const { return bHasOtherTags; } SetHasOtherTags(bool bIn)194 void SetHasOtherTags(bool bIn) { bHasOtherTags = bIn; } 195 HasAllTags()196 bool HasAllTags() const { return bHasAllTags; } SetHasAllTags(bool bIn)197 void SetHasAllTags(bool bIn) { bHasAllTags = bIn; } 198 199 void SetFieldsFromTags(OGRFeature* poFeature, 200 GIntBig nID, 201 bool bIsWayID, 202 unsigned int nTags, OSMTag* pasTags, 203 OSMInfo* psInfo); 204 SetDeclareInterest(bool bIn)205 void SetDeclareInterest(bool bIn) { bUserInterested = bIn; } IsUserInterested()206 bool IsUserInterested() const { return bUserInterested; } 207 HasAttributeFilter()208 int HasAttributeFilter() const { return m_poAttrQuery != nullptr; } 209 int EvaluateAttributeFilter(OGRFeature* poFeature); 210 211 void AddInsignificantKey(const char* pszK); IsSignificantKey(const char * pszK)212 int IsSignificantKey(const char* pszK) const 213 { return aoSetInsignificantKeys.find(pszK) == aoSetInsignificantKeys.end(); } 214 215 void AddIgnoreKey(const char* pszK); 216 void AddWarnKey(const char* pszK); 217 218 void AddComputedAttribute(const char* pszName, 219 OGRFieldType eType, 220 const char* pszSQL); 221 }; 222 223 /************************************************************************/ 224 /* OGROSMDataSource */ 225 /************************************************************************/ 226 227 typedef struct 228 { 229 char* pszK; 230 int nKeyIndex; 231 int nOccurrences; 232 std::vector<char*> asValues; 233 std::map<const char*, int, ConstCharComp> anMapV; /* map that is the reverse of asValues */ 234 } KeyDesc; 235 236 typedef struct 237 { 238 short nKeyIndex; /* index of OGROSMDataSource.asKeys */ 239 short bVIsIndex; /* whether we should use nValueIndex or nOffsetInpabyNonRedundantValues */ 240 union 241 { 242 int nValueIndex; /* index of KeyDesc.asValues */ 243 int nOffsetInpabyNonRedundantValues; /* offset in OGROSMDataSource.pabyNonRedundantValues */ 244 } u; 245 } IndexedKVP; 246 247 typedef struct 248 { 249 GIntBig nOff; 250 /* Note: only one of nth bucket pabyBitmap or panSectorSize must be free'd */ 251 union 252 { 253 GByte *pabyBitmap; /* array of BUCKET_BITMAP_SIZE bytes */ 254 GByte *panSectorSize; /* array of BUCKET_SECTOR_SIZE_ARRAY_SIZE bytes. Each values means (size in bytes - 8 ) / 2, minus 8. 252 means uncompressed */ 255 } u; 256 } Bucket; 257 258 typedef struct 259 { 260 int nLon; 261 int nLat; 262 } LonLat; 263 264 typedef struct 265 { 266 GIntBig nWayID; 267 GIntBig* panNodeRefs; /* point to a sub-array of OGROSMDataSource.anReqIds */ 268 unsigned int nRefs; 269 unsigned int nTags; 270 IndexedKVP* pasTags; /* point to a sub-array of OGROSMDataSource.pasAccumulatedTags */ 271 OSMInfo sInfo; 272 OGRFeature *poFeature; 273 EMULATED_BOOL bIsArea : 1; 274 EMULATED_BOOL bAttrFilterAlreadyEvaluated : 1; 275 } WayFeaturePair; 276 277 #ifdef ENABLE_NODE_LOOKUP_BY_HASHING 278 typedef struct 279 { 280 int nInd; /* values are indexes of panReqIds */ 281 int nNext; /* values are indexes of psCollisionBuckets, or -1 to stop the chain */ 282 } CollisionBucket; 283 #endif 284 285 class OGROSMDataSource final: public OGRDataSource 286 { 287 friend class OGROSMLayer; 288 289 int nLayers; 290 OGROSMLayer** papoLayers; 291 char* pszName; 292 293 OGREnvelope sExtent; 294 bool bExtentValid; 295 296 // Starts off at -1 to indicate that we do not know. 297 int bInterleavedReading; 298 OGROSMLayer *poCurrentLayer; 299 300 OSMContext *psParser; 301 bool bHasParsedFirstChunk; 302 bool bStopParsing; 303 304 sqlite3_vfs* pMyVFS; 305 306 sqlite3 *hDB; 307 sqlite3_stmt *hInsertNodeStmt; 308 sqlite3_stmt *hInsertWayStmt; 309 sqlite3_stmt *hSelectNodeBetweenStmt; 310 sqlite3_stmt **pahSelectNodeStmt; 311 sqlite3_stmt **pahSelectWayStmt; 312 sqlite3_stmt *hInsertPolygonsStandaloneStmt; 313 sqlite3_stmt *hDeletePolygonsStandaloneStmt; 314 sqlite3_stmt *hSelectPolygonsStandaloneStmt; 315 bool bHasRowInPolygonsStandalone; 316 317 sqlite3 *hDBForComputedAttributes; 318 319 int nMaxSizeForInMemoryDBInMB; 320 bool bInMemoryTmpDB; 321 bool bMustUnlink; 322 CPLString osTmpDBName; 323 324 int nNodesInTransaction; 325 326 std::unordered_set<std::string> aoSetClosedWaysArePolygons; 327 int nMinSizeKeysInSetClosedWaysArePolygons; 328 int nMaxSizeKeysInSetClosedWaysArePolygons; 329 330 std::vector<LonLat> m_asLonLatCache{}; 331 332 std::array<const char*, 7> m_ignoredKeys; 333 334 bool bReportAllNodes; 335 bool bReportAllWays; 336 337 bool bFeatureAdded; 338 339 bool bInTransaction; 340 341 bool bIndexPoints; 342 bool bUsePointsIndex; 343 bool bIndexWays; 344 bool bUseWaysIndex; 345 346 std::vector<bool> abSavedDeclaredInterest; 347 OGRLayer* poResultSetLayer; 348 bool bIndexPointsBackup; 349 bool bUsePointsIndexBackup; 350 bool bIndexWaysBackup; 351 bool bUseWaysIndexBackup; 352 353 bool bIsFeatureCountEnabled; 354 355 bool bAttributeNameLaundering; 356 357 std::vector<GByte> m_abyWayBuffer{}; 358 359 int nWaysProcessed; 360 int nRelationsProcessed; 361 362 bool bCustomIndexing; 363 bool bCompressNodes; 364 365 unsigned int nUnsortedReqIds; 366 GIntBig *panUnsortedReqIds; 367 368 unsigned int nReqIds; 369 GIntBig *panReqIds; 370 371 #ifdef ENABLE_NODE_LOOKUP_BY_HASHING 372 bool bEnableHashedIndex; 373 /* values >= 0 are indexes of panReqIds. */ 374 /* == -1 for unoccupied */ 375 /* < -1 are expressed as -nIndexToCollisionBuckets-2 where nIndexToCollisionBuckets point to psCollisionBuckets */ 376 int *panHashedIndexes; 377 CollisionBucket *psCollisionBuckets; 378 bool bHashedIndexValid; 379 #endif 380 381 LonLat *pasLonLatArray; 382 383 IndexedKVP *pasAccumulatedTags; /* points to content of pabyNonRedundantValues or aoMapIndexedKeys */ 384 int nAccumulatedTags; 385 GByte *pabyNonRedundantValues; 386 int nNonRedundantValuesLen; 387 WayFeaturePair *pasWayFeaturePairs; 388 int nWayFeaturePairs; 389 390 int nNextKeyIndex; 391 std::vector<KeyDesc*> asKeys; 392 std::map<const char*, KeyDesc*, ConstCharComp> aoMapIndexedKeys; /* map that is the reverse of asKeys */ 393 394 CPLString osNodesFilename; 395 bool bInMemoryNodesFile; 396 bool bMustUnlinkNodesFile; 397 GIntBig nNodesFileSize; 398 VSILFILE *fpNodes; 399 400 GIntBig nPrevNodeId; 401 int nBucketOld; 402 int nOffInBucketReducedOld; 403 GByte *pabySector; 404 std::map<int, Bucket> oMapBuckets; 405 Bucket* GetBucket(int nBucketId); 406 407 bool bNeedsToSaveWayInfo; 408 409 static const GIntBig FILESIZE_NOT_INIT = -2; 410 static const GIntBig FILESIZE_INVALID = -1; 411 GIntBig m_nFileSize; 412 413 void CompressWay (bool bIsArea, unsigned int nTags, IndexedKVP* pasTags, 414 int nPoints, LonLat* pasLonLatPairs, 415 OSMInfo* psInfo, 416 std::vector<GByte> &abyCompressedWay); 417 void UncompressWay( int nBytes, const GByte* pabyCompressedWay, 418 bool *pbIsArea, 419 std::vector<LonLat>& asCoords, 420 unsigned int* pnTags, OSMTag* pasTags, 421 OSMInfo* psInfo ); 422 423 bool ParseConf(char** papszOpenOptions); 424 bool CreateTempDB(); 425 bool SetDBOptions(); 426 bool SetCacheSize(); 427 bool CreatePreparedStatements(); 428 void CloseDB(); 429 430 bool IndexPoint( OSMNode* psNode ); 431 bool IndexPointSQLite( OSMNode* psNode ); 432 bool FlushCurrentSector(); 433 bool FlushCurrentSectorCompressedCase(); 434 bool FlushCurrentSectorNonCompressedCase(); 435 bool IndexPointCustom( OSMNode* psNode ); 436 437 void IndexWay(GIntBig nWayID, bool bIsArea, 438 unsigned int nTags, IndexedKVP* pasTags, 439 LonLat* pasLonLatPairs, int nPairs, 440 OSMInfo* psInfo); 441 442 bool StartTransactionCacheDB(); 443 bool CommitTransactionCacheDB(); 444 445 int FindNode(GIntBig nID); 446 void ProcessWaysBatch(); 447 448 void ProcessPolygonsStandalone(); 449 450 void LookupNodes(); 451 void LookupNodesSQLite(); 452 void LookupNodesCustom(); 453 void LookupNodesCustomCompressedCase(); 454 void LookupNodesCustomNonCompressedCase(); 455 456 unsigned int LookupWays( std::map< GIntBig, std::pair<int,void*> >& aoMapWays, 457 OSMRelation* psRelation ); 458 459 OGRGeometry* BuildMultiPolygon(OSMRelation* psRelation, 460 unsigned int* pnTags, 461 OSMTag* pasTags); 462 OGRGeometry* BuildGeometryCollection(OSMRelation* psRelation, int bMultiLineString); 463 464 bool TransferToDiskIfNecesserary(); 465 466 Bucket* AllocBucket(int iBucket); 467 468 void AddComputedAttributes(int iCurLayer, 469 const std::vector<OGROSMComputedAttribute>& oAttributes); 470 bool IsClosedWayTaggedAsPolygon( unsigned int nTags, const OSMTag* pasTags ); 471 472 public: 473 OGROSMDataSource(); 474 virtual ~OGROSMDataSource(); 475 GetName()476 virtual const char *GetName() override { return pszName; } GetLayerCount()477 virtual int GetLayerCount() override { return nLayers; } 478 virtual OGRLayer *GetLayer( int ) override; 479 480 virtual int TestCapability( const char * ) override; 481 482 virtual OGRLayer * ExecuteSQL( const char *pszSQLCommand, 483 OGRGeometry *poSpatialFilter, 484 const char *pszDialect ) override; 485 virtual void ReleaseResultSet( OGRLayer * poLayer ) override; 486 487 virtual void ResetReading() override; 488 virtual OGRFeature* GetNextFeature( OGRLayer** ppoBelongingLayer, 489 double* pdfProgressPct, 490 GDALProgressFunc pfnProgress, 491 void* pProgressData ) override; 492 493 int Open ( const char* pszFilename, char** papszOpenOptions ); 494 495 int MyResetReading(); 496 bool ParseNextChunk(int nIdxLayer, 497 GDALProgressFunc pfnProgress, 498 void* pProgressData); 499 OGRErr GetExtent( OGREnvelope *psExtent ); 500 int IsInterleavedReading(); 501 502 void NotifyNodes(unsigned int nNodes, OSMNode* pasNodes); 503 void NotifyWay (OSMWay* psWay); 504 void NotifyRelation (OSMRelation* psRelation); 505 void NotifyBounds (double dfXMin, double dfYMin, 506 double dfXMax, double dfYMax); 507 GetCurrentLayer()508 OGROSMLayer* GetCurrentLayer() { return poCurrentLayer; } SetCurrentLayer(OGROSMLayer * poLyr)509 void SetCurrentLayer(OGROSMLayer* poLyr) { poCurrentLayer = poLyr; } 510 IsFeatureCountEnabled()511 bool IsFeatureCountEnabled() const { return bIsFeatureCountEnabled; } 512 DoesAttributeNameLaundering()513 bool DoesAttributeNameLaundering() const { return bAttributeNameLaundering; } 514 }; 515 516 #endif /* ndef OGR_OSM_H_INCLUDED */ 517