1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3############################################################################### 4# $Id: ogrmerge.py 05a98f65afa7e5048cae16a58c2268510cd89103 2021-01-15 16:16:00 +0200 Idan Miara $ 5# 6# Project: GDAL/OGR samples 7# Purpose: Merge the content of several vector datasets into a single one. 8# Author: Even Rouault <even dot rouault at spatialys dot com> 9# 10############################################################################### 11# Copyright (c) 2017, Even Rouault <even dot rouault at spatialys dot com> 12# 13# Permission is hereby granted, free of charge, to any person obtaining a 14# copy of this software and associated documentation files (the "Software"), 15# to deal in the Software without restriction, including without limitation 16# the rights to use, copy, modify, merge, publish, distribute, sublicense, 17# and/or sell copies of the Software, and to permit persons to whom the 18# Software is furnished to do so, subject to the following conditions: 19# 20# The above copyright notice and this permission notice shall be included 21# in all copies or substantial portions of the Software. 22# 23# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 24# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 29# DEALINGS IN THE SOFTWARE. 30############################################################################### 31 32import glob 33import os 34import os.path 35import sys 36 37from osgeo import gdal 38from osgeo import ogr 39from osgeo_utils.auxiliary.util import GetOutputDriverFor 40 41 42def Usage(): 43 print('ogrmerge.py -o out_dsname src_dsname [src_dsname]*') 44 print(' [-f format] [-single] [-nln layer_name_template]') 45 print(' [-update | -overwrite_ds] [-append | -overwrite_layer]') 46 print(' [-src_geom_type geom_type_name[,geom_type_name]*]') 47 print(' [-dsco NAME=VALUE]* [-lco NAME=VALUE]*') 48 print(' [-s_srs srs_def] [-t_srs srs_def | -a_srs srs_def]') 49 print(' [-progress] [-skipfailures] [--help-general]') 50 print('') 51 print('Options specific to -single:') 52 print(' [-field_strategy FirstLayer|Union|Intersection]') 53 print(' [-src_layer_field_name name]') 54 print(' [-src_layer_field_content layer_name_template]') 55 print('') 56 print('* layer_name_template can contain the following substituable ' 57 'variables:') 58 print(' {AUTO_NAME} : {DS_BASENAME}_{LAYER_NAME} if they are ' 59 'different') 60 print(' or {LAYER_NAME} if they are identical') 61 print(' {DS_NAME} : name of the source dataset') 62 print(' {DS_BASENAME}: base name of the source dataset') 63 print(' {DS_INDEX} : index of the source dataset') 64 print(' {LAYER_NAME} : name of the source layer') 65 print(' {LAYER_INDEX}: index of the source layer') 66 67 return 1 68 69 70############################################################################# 71 72 73def _VSIFPrintfL(f, s): 74 gdal.VSIFWriteL(s, 1, len(s), f) 75 76############################################################################# 77 78 79def EQUAL(x, y): 80 return x.lower() == y.lower() 81 82############################################################################# 83 84 85def _GetGeomType(src_geom_type_name): 86 if EQUAL(src_geom_type_name, "GEOMETRY"): 87 return ogr.wkbUnknown 88 try: 89 max_geom_type = ogr.wkbTriangle 90 except: 91 # GDAL 2.1 compat 92 max_geom_type = ogr.wkbSurface 93 for i in range(max_geom_type + 1): 94 if EQUAL(src_geom_type_name, 95 ogr.GeometryTypeToName(i).replace(' ', '')): 96 return i 97 return None 98 99############################################################################# 100 101 102def _Esc(x): 103 return gdal.EscapeString(x, gdal.CPLES_XML) 104 105 106class XMLWriter(object): 107 108 def __init__(self, f): 109 self.f = f 110 self.inc = 0 111 self.elements = [] 112 113 def _indent(self): 114 return ' ' * self.inc 115 116 def open_element(self, name, attrs=None): 117 xml_attrs = '' 118 if attrs is not None: 119 for key in attrs: 120 xml_attrs = xml_attrs + ' %s=\"%s\"' % (key, _Esc(attrs[key].encode('utf-8'))) 121 x = '%s<%s%s>\n' % (self._indent(), name, xml_attrs) 122 x = x.encode('utf-8') 123 _VSIFPrintfL(self.f, x) 124 self.inc = self.inc + 1 125 self.elements.append(name) 126 127 def write_element_value(self, name, value, attrs=None): 128 xml_attrs = '' 129 if attrs is not None: 130 for key in attrs: 131 xml_attrs = xml_attrs + ' %s=\"%s\"' % (key, _Esc(attrs[key].encode('utf-8'))) 132 x = '%s<%s%s>%s</%s>\n' % (self._indent(), name, xml_attrs, 133 _Esc(value.encode('utf-8')), name) 134 x = x.encode('utf-8') 135 _VSIFPrintfL(self.f, x) 136 137 def close_element(self, closing_name=None): 138 self.inc = self.inc - 1 139 name = self.elements[-1] 140 if closing_name is not None: 141 assert name == closing_name 142 self.elements = self.elements[0:-1] 143 _VSIFPrintfL(self.f, '%s</%s>\n' % (self._indent(), name)) 144 145 146############################################################### 147# process() 148 149 150def process(argv, progress=None, progress_arg=None): 151 152 if not argv: 153 return Usage() 154 155 dst_filename = None 156 output_format = None 157 src_datasets = [] 158 overwrite_ds = False 159 overwrite_layer = False 160 update = False 161 append = False 162 single_layer = False 163 layer_name_template = None 164 skip_failures = False 165 src_geom_types = [] 166 field_strategy = None 167 src_layer_field_name = None 168 src_layer_field_content = None 169 a_srs = None 170 s_srs = None 171 t_srs = None 172 dsco = [] 173 lco = [] 174 175 i = 0 176 while i < len(argv): 177 arg = argv[i] 178 if (arg == '-f' or arg == '-of') and i + 1 < len(argv): 179 i = i + 1 180 output_format = argv[i] 181 elif arg == '-o' and i + 1 < len(argv): 182 i = i + 1 183 dst_filename = argv[i] 184 elif arg == '-progress': 185 progress = ogr.TermProgress_nocb 186 progress_arg = None 187 elif arg == '-q' or arg == '-quiet': 188 pass 189 elif arg[0:5] == '-skip': 190 skip_failures = True 191 elif arg == '-update': 192 update = True 193 elif arg == '-overwrite_ds': 194 overwrite_ds = True 195 elif arg == '-overwrite_layer': 196 overwrite_layer = True 197 update = True 198 elif arg == '-append': 199 append = True 200 update = True 201 elif arg == '-single': 202 single_layer = True 203 elif arg == '-a_srs' and i + 1 < len(argv): 204 i = i + 1 205 a_srs = argv[i] 206 elif arg == '-s_srs' and i + 1 < len(argv): 207 i = i + 1 208 s_srs = argv[i] 209 elif arg == '-t_srs' and i + 1 < len(argv): 210 i = i + 1 211 t_srs = argv[i] 212 elif arg == '-nln' and i + 1 < len(argv): 213 i = i + 1 214 layer_name_template = argv[i] 215 elif arg == '-field_strategy' and i + 1 < len(argv): 216 i = i + 1 217 field_strategy = argv[i] 218 elif arg == '-src_layer_field_name' and i + 1 < len(argv): 219 i = i + 1 220 src_layer_field_name = argv[i] 221 elif arg == '-src_layer_field_content' and i + 1 < len(argv): 222 i = i + 1 223 src_layer_field_content = argv[i] 224 elif arg == '-dsco' and i + 1 < len(argv): 225 i = i + 1 226 dsco.append(argv[i]) 227 elif arg == '-lco' and i + 1 < len(argv): 228 i = i + 1 229 lco.append(argv[i]) 230 elif arg == '-src_geom_type' and i + 1 < len(argv): 231 i = i + 1 232 src_geom_type_names = argv[i].split(',') 233 for src_geom_type_name in src_geom_type_names: 234 src_geom_type = _GetGeomType(src_geom_type_name) 235 if src_geom_type is None: 236 print('ERROR: Unrecognized geometry type: %s' % 237 src_geom_type_name) 238 return 1 239 src_geom_types.append(src_geom_type) 240 elif arg[0] == '-': 241 print('ERROR: Unrecognized argument : %s' % arg) 242 return Usage() 243 else: 244 if '*' in arg: 245 src_datasets += glob.glob(arg) 246 else: 247 src_datasets.append(arg) 248 i = i + 1 249 250 if dst_filename is None: 251 print('Missing -o') 252 return 1 253 254 if update: 255 if output_format is not None: 256 print('ERROR: -f incompatible with -update') 257 return 1 258 if dsco: 259 print('ERROR: -dsco incompatible with -update') 260 return 1 261 output_format = '' 262 else: 263 if output_format is None: 264 output_format = GetOutputDriverFor(dst_filename, is_raster=False) 265 266 if src_layer_field_content is None: 267 src_layer_field_content = '{AUTO_NAME}' 268 elif src_layer_field_name is None: 269 src_layer_field_name = 'source_ds_lyr' 270 271 if not single_layer and output_format == 'ESRI Shapefile' and \ 272 dst_filename.lower().endswith('.shp'): 273 print('ERROR: Non-single layer mode incompatible with non-directory ' 274 'shapefile output') 275 return 1 276 277 if not src_datasets: 278 print('ERROR: No source datasets') 279 return 1 280 281 if layer_name_template is None: 282 if single_layer: 283 layer_name_template = 'merged' 284 else: 285 layer_name_template = '{AUTO_NAME}' 286 287 vrt_filename = None 288 if not EQUAL(output_format, 'VRT'): 289 dst_ds = gdal.OpenEx(dst_filename, gdal.OF_VECTOR | gdal.OF_UPDATE) 290 if dst_ds is not None: 291 if not update and not overwrite_ds: 292 print('ERROR: Destination dataset already exists, ' + 293 'but -update nor -overwrite_ds are specified') 294 return 1 295 if overwrite_ds: 296 drv = dst_ds.GetDriver() 297 dst_ds = None 298 if drv.GetDescription() == 'OGR_VRT': 299 # We don't want to destroy the sources of the VRT 300 gdal.Unlink(dst_filename) 301 else: 302 drv.Delete(dst_filename) 303 elif update: 304 print('ERROR: Destination dataset does not exist') 305 return 1 306 if dst_ds is None: 307 drv = gdal.GetDriverByName(output_format) 308 if drv is None: 309 print('ERROR: Invalid driver: %s' % output_format) 310 return 1 311 dst_ds = drv.Create( 312 dst_filename, 0, 0, 0, gdal.GDT_Unknown, dsco) 313 if dst_ds is None: 314 return 1 315 316 vrt_filename = '/vsimem/_ogrmerge_.vrt' 317 else: 318 if gdal.VSIStatL(dst_filename) and not overwrite_ds: 319 print('ERROR: Destination dataset already exists, ' + 320 'but -overwrite_ds are specified') 321 return 1 322 vrt_filename = dst_filename 323 324 f = gdal.VSIFOpenL(vrt_filename, 'wb') 325 if f is None: 326 print('ERROR: Cannot create %s' % vrt_filename) 327 return 1 328 329 writer = XMLWriter(f) 330 writer.open_element('OGRVRTDataSource') 331 332 if single_layer: 333 334 ogr_vrt_union_layer_written = False 335 336 for src_ds_idx, src_dsname in enumerate(src_datasets): 337 src_ds = ogr.Open(src_dsname) 338 if src_ds is None: 339 print('ERROR: Cannot open %s' % src_dsname) 340 if skip_failures: 341 continue 342 gdal.VSIFCloseL(f) 343 gdal.Unlink(vrt_filename) 344 return 1 345 for src_lyr_idx, src_lyr in enumerate(src_ds): 346 if src_geom_types: 347 gt = ogr.GT_Flatten(src_lyr.GetGeomType()) 348 if gt not in src_geom_types: 349 continue 350 351 if not ogr_vrt_union_layer_written: 352 ogr_vrt_union_layer_written = True 353 writer.open_element('OGRVRTUnionLayer', 354 attrs={'name': layer_name_template}) 355 356 if src_layer_field_name is not None: 357 writer.write_element_value('SourceLayerFieldName', 358 src_layer_field_name) 359 360 if field_strategy is not None: 361 writer.write_element_value('FieldStrategy', 362 field_strategy) 363 364 layer_name = src_layer_field_content 365 366 src_lyr_name = src_lyr.GetName() 367 try: 368 src_lyr_name = src_lyr_name.decode('utf-8') 369 except AttributeError: 370 pass 371 372 basename = None 373 if os.path.exists(src_dsname): 374 basename = os.path.basename(src_dsname) 375 if '.' in basename: 376 basename = '.'.join(basename.split(".")[0:-1]) 377 378 if basename == src_lyr_name: 379 layer_name = layer_name.replace('{AUTO_NAME}', basename) 380 elif basename is None: 381 layer_name = layer_name.replace( 382 '{AUTO_NAME}', 383 'Dataset%d_%s' % (src_ds_idx, src_lyr_name)) 384 else: 385 layer_name = layer_name.replace( 386 '{AUTO_NAME}', basename + '_' + src_lyr_name) 387 388 if basename is not None: 389 layer_name = layer_name.replace('{DS_BASENAME}', basename) 390 else: 391 layer_name = layer_name.replace('{DS_BASENAME}', 392 src_dsname) 393 layer_name = layer_name.replace('{DS_NAME}', '%s' % 394 src_dsname) 395 layer_name = layer_name.replace('{DS_INDEX}', '%d' % 396 src_ds_idx) 397 layer_name = layer_name.replace('{LAYER_NAME}', 398 src_lyr_name) 399 layer_name = layer_name.replace('{LAYER_INDEX}', '%d' % 400 src_lyr_idx) 401 402 if t_srs is not None: 403 writer.open_element('OGRVRTWarpedLayer') 404 405 writer.open_element('OGRVRTLayer', 406 attrs={'name': layer_name}) 407 attrs = {} 408 if EQUAL(output_format, 'VRT') and \ 409 os.path.exists(src_dsname) and \ 410 not os.path.isabs(src_dsname) and \ 411 '/' not in vrt_filename and \ 412 '\\' not in vrt_filename: 413 attrs['relativeToVRT'] = '1' 414 if single_layer: 415 attrs['shared'] = '1' 416 writer.write_element_value('SrcDataSource', src_dsname, 417 attrs=attrs) 418 writer.write_element_value('SrcLayer', src_lyr.GetName()) 419 420 if a_srs is not None: 421 writer.write_element_value('LayerSRS', a_srs) 422 423 writer.close_element('OGRVRTLayer') 424 425 if t_srs is not None: 426 if s_srs is not None: 427 writer.write_element_value('SrcSRS', s_srs) 428 429 writer.write_element_value('TargetSRS', t_srs) 430 431 writer.close_element('OGRVRTWarpedLayer') 432 433 if ogr_vrt_union_layer_written: 434 writer.close_element('OGRVRTUnionLayer') 435 436 else: 437 438 for src_ds_idx, src_dsname in enumerate(src_datasets): 439 src_ds = ogr.Open(src_dsname) 440 if src_ds is None: 441 print('ERROR: Cannot open %s' % src_dsname) 442 if skip_failures: 443 continue 444 gdal.VSIFCloseL(f) 445 gdal.Unlink(vrt_filename) 446 return 1 447 for src_lyr_idx, src_lyr in enumerate(src_ds): 448 if src_geom_types: 449 gt = ogr.GT_Flatten(src_lyr.GetGeomType()) 450 if gt not in src_geom_types: 451 continue 452 453 src_lyr_name = src_lyr.GetName() 454 try: 455 src_lyr_name = src_lyr_name.decode('utf-8') 456 except AttributeError: 457 pass 458 459 layer_name = layer_name_template 460 basename = None 461 if os.path.exists(src_dsname): 462 basename = os.path.basename(src_dsname) 463 if '.' in basename: 464 basename = '.'.join(basename.split(".")[0:-1]) 465 466 if basename == src_lyr_name: 467 layer_name = layer_name.replace('{AUTO_NAME}', basename) 468 elif basename is None: 469 layer_name = layer_name.replace( 470 '{AUTO_NAME}', 471 'Dataset%d_%s' % (src_ds_idx, src_lyr_name)) 472 else: 473 layer_name = layer_name.replace( 474 '{AUTO_NAME}', basename + '_' + src_lyr_name) 475 476 if basename is not None: 477 layer_name = layer_name.replace('{DS_BASENAME}', basename) 478 elif '{DS_BASENAME}' in layer_name: 479 if skip_failures: 480 if '{DS_INDEX}' not in layer_name: 481 layer_name = layer_name.replace( 482 '{DS_BASENAME}', 'Dataset%d' % src_ds_idx) 483 else: 484 print('ERROR: Layer name template %s ' 485 'includes {DS_BASENAME} ' 486 'but %s is not a file' % 487 (layer_name_template, src_dsname)) 488 489 gdal.VSIFCloseL(f) 490 gdal.Unlink(vrt_filename) 491 return 1 492 layer_name = layer_name.replace('{DS_NAME}', '%s' % 493 src_dsname) 494 layer_name = layer_name.replace('{DS_INDEX}', '%d' % 495 src_ds_idx) 496 layer_name = layer_name.replace('{LAYER_NAME}', 497 src_lyr_name) 498 layer_name = layer_name.replace('{LAYER_INDEX}', '%d' % 499 src_lyr_idx) 500 501 if t_srs is not None: 502 writer.open_element('OGRVRTWarpedLayer') 503 504 writer.open_element('OGRVRTLayer', 505 attrs={'name': layer_name}) 506 attrs = {} 507 if EQUAL(output_format, 'VRT') and \ 508 os.path.exists(src_dsname) and \ 509 not os.path.isabs(src_dsname) and \ 510 '/' not in vrt_filename and \ 511 '\\' not in vrt_filename: 512 attrs['relativeToVRT'] = '1' 513 if single_layer: 514 attrs['shared'] = '1' 515 writer.write_element_value('SrcDataSource', src_dsname, 516 attrs=attrs) 517 writer.write_element_value('SrcLayer', src_lyr_name) 518 519 if a_srs is not None: 520 writer.write_element_value('LayerSRS', a_srs) 521 522 writer.close_element('OGRVRTLayer') 523 524 if t_srs is not None: 525 if s_srs is not None: 526 writer.write_element_value('SrcSRS', s_srs) 527 528 writer.write_element_value('TargetSRS', t_srs) 529 530 writer.close_element('OGRVRTWarpedLayer') 531 532 writer.close_element('OGRVRTDataSource') 533 534 gdal.VSIFCloseL(f) 535 536 ret = 0 537 if not EQUAL(output_format, 'VRT'): 538 accessMode = None 539 if append: 540 accessMode = 'append' 541 elif overwrite_layer: 542 accessMode = 'overwrite' 543 ret = gdal.VectorTranslate(dst_ds, vrt_filename, 544 accessMode=accessMode, 545 layerCreationOptions=lco, 546 skipFailures=skip_failures, 547 callback=progress, 548 callback_data=progress_arg) 549 if ret == 1: 550 ret = 0 551 else: 552 ret = 1 553 gdal.Unlink(vrt_filename) 554 555 return ret 556 557 558def main(argv): 559 argv = ogr.GeneralCmdLineProcessor(argv) 560 if argv is None: 561 return 1 562 return process(argv[1:]) 563 564 565if __name__ == '__main__': 566 sys.exit(main(sys.argv)) 567