1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3###############################################################################
4# $Id: ogrmerge.py 05a98f65afa7e5048cae16a58c2268510cd89103 2021-01-15 16:16:00 +0200 Idan Miara $
5#
6# Project:  GDAL/OGR samples
7# Purpose:  Merge the content of several vector datasets into a single one.
8# Author:   Even Rouault <even dot rouault at spatialys dot com>
9#
10###############################################################################
11# Copyright (c) 2017, Even Rouault <even dot rouault at spatialys dot com>
12#
13# Permission is hereby granted, free of charge, to any person obtaining a
14# copy of this software and associated documentation files (the "Software"),
15# to deal in the Software without restriction, including without limitation
16# the rights to use, copy, modify, merge, publish, distribute, sublicense,
17# and/or sell copies of the Software, and to permit persons to whom the
18# Software is furnished to do so, subject to the following conditions:
19#
20# The above copyright notice and this permission notice shall be included
21# in all copies or substantial portions of the Software.
22#
23# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29# DEALINGS IN THE SOFTWARE.
30###############################################################################
31
32import glob
33import os
34import os.path
35import sys
36
37from osgeo import gdal
38from osgeo import ogr
39from osgeo_utils.auxiliary.util import GetOutputDriverFor
40
41
42def Usage():
43    print('ogrmerge.py -o out_dsname src_dsname [src_dsname]*')
44    print('            [-f format] [-single] [-nln layer_name_template]')
45    print('            [-update | -overwrite_ds] [-append | -overwrite_layer]')
46    print('            [-src_geom_type geom_type_name[,geom_type_name]*]')
47    print('            [-dsco NAME=VALUE]* [-lco NAME=VALUE]*')
48    print('            [-s_srs srs_def] [-t_srs srs_def | -a_srs srs_def]')
49    print('            [-progress] [-skipfailures] [--help-general]')
50    print('')
51    print('Options specific to -single:')
52    print('            [-field_strategy FirstLayer|Union|Intersection]')
53    print('            [-src_layer_field_name name]')
54    print('            [-src_layer_field_content layer_name_template]')
55    print('')
56    print('* layer_name_template can contain the following substituable '
57          'variables:')
58    print('     {AUTO_NAME}  : {DS_BASENAME}_{LAYER_NAME} if they are '
59          'different')
60    print('                    or {LAYER_NAME} if they are identical')
61    print('     {DS_NAME}    : name of the source dataset')
62    print('     {DS_BASENAME}: base name of the source dataset')
63    print('     {DS_INDEX}   : index of the source dataset')
64    print('     {LAYER_NAME} : name of the source layer')
65    print('     {LAYER_INDEX}: index of the source layer')
66
67    return 1
68
69
70#############################################################################
71
72
73def _VSIFPrintfL(f, s):
74    gdal.VSIFWriteL(s, 1, len(s), f)
75
76#############################################################################
77
78
79def EQUAL(x, y):
80    return x.lower() == y.lower()
81
82#############################################################################
83
84
85def _GetGeomType(src_geom_type_name):
86    if EQUAL(src_geom_type_name, "GEOMETRY"):
87        return ogr.wkbUnknown
88    try:
89        max_geom_type = ogr.wkbTriangle
90    except:
91        # GDAL 2.1 compat
92        max_geom_type = ogr.wkbSurface
93    for i in range(max_geom_type + 1):
94        if EQUAL(src_geom_type_name,
95                 ogr.GeometryTypeToName(i).replace(' ', '')):
96            return i
97    return None
98
99#############################################################################
100
101
102def _Esc(x):
103    return gdal.EscapeString(x, gdal.CPLES_XML)
104
105
106class XMLWriter(object):
107
108    def __init__(self, f):
109        self.f = f
110        self.inc = 0
111        self.elements = []
112
113    def _indent(self):
114        return '  ' * self.inc
115
116    def open_element(self, name, attrs=None):
117        xml_attrs = ''
118        if attrs is not None:
119            for key in attrs:
120                xml_attrs = xml_attrs + ' %s=\"%s\"' % (key, _Esc(attrs[key].encode('utf-8')))
121        x = '%s<%s%s>\n' % (self._indent(), name, xml_attrs)
122        x = x.encode('utf-8')
123        _VSIFPrintfL(self.f, x)
124        self.inc = self.inc + 1
125        self.elements.append(name)
126
127    def write_element_value(self, name, value, attrs=None):
128        xml_attrs = ''
129        if attrs is not None:
130            for key in attrs:
131                xml_attrs = xml_attrs + ' %s=\"%s\"' % (key, _Esc(attrs[key].encode('utf-8')))
132        x = '%s<%s%s>%s</%s>\n' % (self._indent(), name, xml_attrs,
133                      _Esc(value.encode('utf-8')), name)
134        x = x.encode('utf-8')
135        _VSIFPrintfL(self.f, x)
136
137    def close_element(self, closing_name=None):
138        self.inc = self.inc - 1
139        name = self.elements[-1]
140        if closing_name is not None:
141            assert name == closing_name
142        self.elements = self.elements[0:-1]
143        _VSIFPrintfL(self.f, '%s</%s>\n' % (self._indent(), name))
144
145
146###############################################################
147# process()
148
149
150def process(argv, progress=None, progress_arg=None):
151
152    if not argv:
153        return Usage()
154
155    dst_filename = None
156    output_format = None
157    src_datasets = []
158    overwrite_ds = False
159    overwrite_layer = False
160    update = False
161    append = False
162    single_layer = False
163    layer_name_template = None
164    skip_failures = False
165    src_geom_types = []
166    field_strategy = None
167    src_layer_field_name = None
168    src_layer_field_content = None
169    a_srs = None
170    s_srs = None
171    t_srs = None
172    dsco = []
173    lco = []
174
175    i = 0
176    while i < len(argv):
177        arg = argv[i]
178        if (arg == '-f' or arg == '-of') and i + 1 < len(argv):
179            i = i + 1
180            output_format = argv[i]
181        elif arg == '-o' and i + 1 < len(argv):
182            i = i + 1
183            dst_filename = argv[i]
184        elif arg == '-progress':
185            progress = ogr.TermProgress_nocb
186            progress_arg = None
187        elif arg == '-q' or arg == '-quiet':
188            pass
189        elif arg[0:5] == '-skip':
190            skip_failures = True
191        elif arg == '-update':
192            update = True
193        elif arg == '-overwrite_ds':
194            overwrite_ds = True
195        elif arg == '-overwrite_layer':
196            overwrite_layer = True
197            update = True
198        elif arg == '-append':
199            append = True
200            update = True
201        elif arg == '-single':
202            single_layer = True
203        elif arg == '-a_srs' and i + 1 < len(argv):
204            i = i + 1
205            a_srs = argv[i]
206        elif arg == '-s_srs' and i + 1 < len(argv):
207            i = i + 1
208            s_srs = argv[i]
209        elif arg == '-t_srs' and i + 1 < len(argv):
210            i = i + 1
211            t_srs = argv[i]
212        elif arg == '-nln' and i + 1 < len(argv):
213            i = i + 1
214            layer_name_template = argv[i]
215        elif arg == '-field_strategy' and i + 1 < len(argv):
216            i = i + 1
217            field_strategy = argv[i]
218        elif arg == '-src_layer_field_name' and i + 1 < len(argv):
219            i = i + 1
220            src_layer_field_name = argv[i]
221        elif arg == '-src_layer_field_content' and i + 1 < len(argv):
222            i = i + 1
223            src_layer_field_content = argv[i]
224        elif arg == '-dsco' and i + 1 < len(argv):
225            i = i + 1
226            dsco.append(argv[i])
227        elif arg == '-lco' and i + 1 < len(argv):
228            i = i + 1
229            lco.append(argv[i])
230        elif arg == '-src_geom_type' and i + 1 < len(argv):
231            i = i + 1
232            src_geom_type_names = argv[i].split(',')
233            for src_geom_type_name in src_geom_type_names:
234                src_geom_type = _GetGeomType(src_geom_type_name)
235                if src_geom_type is None:
236                    print('ERROR: Unrecognized geometry type: %s' %
237                          src_geom_type_name)
238                    return 1
239                src_geom_types.append(src_geom_type)
240        elif arg[0] == '-':
241            print('ERROR: Unrecognized argument : %s' % arg)
242            return Usage()
243        else:
244            if '*' in arg:
245                src_datasets += glob.glob(arg)
246            else:
247                src_datasets.append(arg)
248        i = i + 1
249
250    if dst_filename is None:
251        print('Missing -o')
252        return 1
253
254    if update:
255        if output_format is not None:
256            print('ERROR: -f incompatible with -update')
257            return 1
258        if dsco:
259            print('ERROR: -dsco incompatible with -update')
260            return 1
261        output_format = ''
262    else:
263        if output_format is None:
264            output_format = GetOutputDriverFor(dst_filename, is_raster=False)
265
266    if src_layer_field_content is None:
267        src_layer_field_content = '{AUTO_NAME}'
268    elif src_layer_field_name is None:
269        src_layer_field_name = 'source_ds_lyr'
270
271    if not single_layer and output_format == 'ESRI Shapefile' and \
272       dst_filename.lower().endswith('.shp'):
273        print('ERROR: Non-single layer mode incompatible with non-directory '
274              'shapefile output')
275        return 1
276
277    if not src_datasets:
278        print('ERROR: No source datasets')
279        return 1
280
281    if layer_name_template is None:
282        if single_layer:
283            layer_name_template = 'merged'
284        else:
285            layer_name_template = '{AUTO_NAME}'
286
287    vrt_filename = None
288    if not EQUAL(output_format, 'VRT'):
289        dst_ds = gdal.OpenEx(dst_filename, gdal.OF_VECTOR | gdal.OF_UPDATE)
290        if dst_ds is not None:
291            if not update and not overwrite_ds:
292                print('ERROR: Destination dataset already exists, ' +
293                      'but -update nor -overwrite_ds are specified')
294                return 1
295            if overwrite_ds:
296                drv = dst_ds.GetDriver()
297                dst_ds = None
298                if drv.GetDescription() == 'OGR_VRT':
299                    # We don't want to destroy the sources of the VRT
300                    gdal.Unlink(dst_filename)
301                else:
302                    drv.Delete(dst_filename)
303        elif update:
304            print('ERROR: Destination dataset does not exist')
305            return 1
306        if dst_ds is None:
307            drv = gdal.GetDriverByName(output_format)
308            if drv is None:
309                print('ERROR: Invalid driver: %s' % output_format)
310                return 1
311            dst_ds = drv.Create(
312                dst_filename, 0, 0, 0, gdal.GDT_Unknown, dsco)
313            if dst_ds is None:
314                return 1
315
316        vrt_filename = '/vsimem/_ogrmerge_.vrt'
317    else:
318        if gdal.VSIStatL(dst_filename) and not overwrite_ds:
319            print('ERROR: Destination dataset already exists, ' +
320                  'but -overwrite_ds are specified')
321            return 1
322        vrt_filename = dst_filename
323
324    f = gdal.VSIFOpenL(vrt_filename, 'wb')
325    if f is None:
326        print('ERROR: Cannot create %s' % vrt_filename)
327        return 1
328
329    writer = XMLWriter(f)
330    writer.open_element('OGRVRTDataSource')
331
332    if single_layer:
333
334        ogr_vrt_union_layer_written = False
335
336        for src_ds_idx, src_dsname in enumerate(src_datasets):
337            src_ds = ogr.Open(src_dsname)
338            if src_ds is None:
339                print('ERROR: Cannot open %s' % src_dsname)
340                if skip_failures:
341                    continue
342                gdal.VSIFCloseL(f)
343                gdal.Unlink(vrt_filename)
344                return 1
345            for src_lyr_idx, src_lyr in enumerate(src_ds):
346                if src_geom_types:
347                    gt = ogr.GT_Flatten(src_lyr.GetGeomType())
348                    if gt not in src_geom_types:
349                        continue
350
351                if not ogr_vrt_union_layer_written:
352                    ogr_vrt_union_layer_written = True
353                    writer.open_element('OGRVRTUnionLayer',
354                                        attrs={'name': layer_name_template})
355
356                    if src_layer_field_name is not None:
357                        writer.write_element_value('SourceLayerFieldName',
358                                                   src_layer_field_name)
359
360                    if field_strategy is not None:
361                        writer.write_element_value('FieldStrategy',
362                                                   field_strategy)
363
364                layer_name = src_layer_field_content
365
366                src_lyr_name = src_lyr.GetName()
367                try:
368                    src_lyr_name = src_lyr_name.decode('utf-8')
369                except AttributeError:
370                    pass
371
372                basename = None
373                if os.path.exists(src_dsname):
374                    basename = os.path.basename(src_dsname)
375                    if '.' in basename:
376                        basename = '.'.join(basename.split(".")[0:-1])
377
378                if basename == src_lyr_name:
379                    layer_name = layer_name.replace('{AUTO_NAME}', basename)
380                elif basename is None:
381                    layer_name = layer_name.replace(
382                        '{AUTO_NAME}',
383                        'Dataset%d_%s' % (src_ds_idx, src_lyr_name))
384                else:
385                    layer_name = layer_name.replace(
386                        '{AUTO_NAME}', basename + '_' + src_lyr_name)
387
388                if basename is not None:
389                    layer_name = layer_name.replace('{DS_BASENAME}', basename)
390                else:
391                    layer_name = layer_name.replace('{DS_BASENAME}',
392                                                    src_dsname)
393                layer_name = layer_name.replace('{DS_NAME}', '%s' %
394                                                src_dsname)
395                layer_name = layer_name.replace('{DS_INDEX}', '%d' %
396                                                src_ds_idx)
397                layer_name = layer_name.replace('{LAYER_NAME}',
398                                                src_lyr_name)
399                layer_name = layer_name.replace('{LAYER_INDEX}', '%d' %
400                                                src_lyr_idx)
401
402                if t_srs is not None:
403                    writer.open_element('OGRVRTWarpedLayer')
404
405                writer.open_element('OGRVRTLayer',
406                                    attrs={'name': layer_name})
407                attrs = {}
408                if EQUAL(output_format, 'VRT') and \
409                   os.path.exists(src_dsname) and \
410                   not os.path.isabs(src_dsname) and \
411                   '/' not in vrt_filename and \
412                   '\\' not in vrt_filename:
413                    attrs['relativeToVRT'] = '1'
414                if single_layer:
415                    attrs['shared'] = '1'
416                writer.write_element_value('SrcDataSource', src_dsname,
417                                           attrs=attrs)
418                writer.write_element_value('SrcLayer', src_lyr.GetName())
419
420                if a_srs is not None:
421                    writer.write_element_value('LayerSRS', a_srs)
422
423                writer.close_element('OGRVRTLayer')
424
425                if t_srs is not None:
426                    if s_srs is not None:
427                        writer.write_element_value('SrcSRS', s_srs)
428
429                    writer.write_element_value('TargetSRS', t_srs)
430
431                    writer.close_element('OGRVRTWarpedLayer')
432
433        if ogr_vrt_union_layer_written:
434            writer.close_element('OGRVRTUnionLayer')
435
436    else:
437
438        for src_ds_idx, src_dsname in enumerate(src_datasets):
439            src_ds = ogr.Open(src_dsname)
440            if src_ds is None:
441                print('ERROR: Cannot open %s' % src_dsname)
442                if skip_failures:
443                    continue
444                gdal.VSIFCloseL(f)
445                gdal.Unlink(vrt_filename)
446                return 1
447            for src_lyr_idx, src_lyr in enumerate(src_ds):
448                if src_geom_types:
449                    gt = ogr.GT_Flatten(src_lyr.GetGeomType())
450                    if gt not in src_geom_types:
451                        continue
452
453                src_lyr_name = src_lyr.GetName()
454                try:
455                    src_lyr_name = src_lyr_name.decode('utf-8')
456                except AttributeError:
457                    pass
458
459                layer_name = layer_name_template
460                basename = None
461                if os.path.exists(src_dsname):
462                    basename = os.path.basename(src_dsname)
463                    if '.' in basename:
464                        basename = '.'.join(basename.split(".")[0:-1])
465
466                if basename == src_lyr_name:
467                    layer_name = layer_name.replace('{AUTO_NAME}', basename)
468                elif basename is None:
469                    layer_name = layer_name.replace(
470                        '{AUTO_NAME}',
471                        'Dataset%d_%s' % (src_ds_idx, src_lyr_name))
472                else:
473                    layer_name = layer_name.replace(
474                        '{AUTO_NAME}', basename + '_' + src_lyr_name)
475
476                if basename is not None:
477                    layer_name = layer_name.replace('{DS_BASENAME}', basename)
478                elif '{DS_BASENAME}' in layer_name:
479                    if skip_failures:
480                        if '{DS_INDEX}' not in layer_name:
481                            layer_name = layer_name.replace(
482                                '{DS_BASENAME}', 'Dataset%d' % src_ds_idx)
483                    else:
484                        print('ERROR: Layer name template %s '
485                              'includes {DS_BASENAME} '
486                              'but %s is not a file' %
487                              (layer_name_template, src_dsname))
488
489                        gdal.VSIFCloseL(f)
490                        gdal.Unlink(vrt_filename)
491                        return 1
492                layer_name = layer_name.replace('{DS_NAME}', '%s' %
493                                                src_dsname)
494                layer_name = layer_name.replace('{DS_INDEX}', '%d' %
495                                                src_ds_idx)
496                layer_name = layer_name.replace('{LAYER_NAME}',
497                                                src_lyr_name)
498                layer_name = layer_name.replace('{LAYER_INDEX}', '%d' %
499                                                src_lyr_idx)
500
501                if t_srs is not None:
502                    writer.open_element('OGRVRTWarpedLayer')
503
504                writer.open_element('OGRVRTLayer',
505                                    attrs={'name': layer_name})
506                attrs = {}
507                if EQUAL(output_format, 'VRT') and \
508                   os.path.exists(src_dsname) and \
509                   not os.path.isabs(src_dsname) and \
510                   '/' not in vrt_filename and \
511                   '\\' not in vrt_filename:
512                    attrs['relativeToVRT'] = '1'
513                if single_layer:
514                    attrs['shared'] = '1'
515                writer.write_element_value('SrcDataSource', src_dsname,
516                                           attrs=attrs)
517                writer.write_element_value('SrcLayer', src_lyr_name)
518
519                if a_srs is not None:
520                    writer.write_element_value('LayerSRS', a_srs)
521
522                writer.close_element('OGRVRTLayer')
523
524                if t_srs is not None:
525                    if s_srs is not None:
526                        writer.write_element_value('SrcSRS', s_srs)
527
528                    writer.write_element_value('TargetSRS', t_srs)
529
530                    writer.close_element('OGRVRTWarpedLayer')
531
532    writer.close_element('OGRVRTDataSource')
533
534    gdal.VSIFCloseL(f)
535
536    ret = 0
537    if not EQUAL(output_format, 'VRT'):
538        accessMode = None
539        if append:
540            accessMode = 'append'
541        elif overwrite_layer:
542            accessMode = 'overwrite'
543        ret = gdal.VectorTranslate(dst_ds, vrt_filename,
544                                   accessMode=accessMode,
545                                   layerCreationOptions=lco,
546                                   skipFailures=skip_failures,
547                                   callback=progress,
548                                   callback_data=progress_arg)
549        if ret == 1:
550            ret = 0
551        else:
552            ret = 1
553        gdal.Unlink(vrt_filename)
554
555    return ret
556
557
558def main(argv):
559    argv = ogr.GeneralCmdLineProcessor(argv)
560    if argv is None:
561        return 1
562    return process(argv[1:])
563
564
565if __name__ == '__main__':
566    sys.exit(main(sys.argv))
567