1#!/usr/bin/env python3
2###############################################################################
3# $Id: gdal2xyz.py 2be8649aadb3f869590306cd5a18a3387a650581 2021-04-23 12:08:46 +0300 Idan Miara $
4#
5# Project:  GDAL
6# Purpose:  Script to translate GDAL supported raster into XYZ ASCII
7#           point stream.
8# Author:   Frank Warmerdam, warmerdam@pobox.com
9#
10###############################################################################
11# Copyright (c) 2002, Frank Warmerdam <warmerdam@pobox.com>
12# Copyright (c) 2020-2021, Idan Miara <idan@miara.com>
13#
14# Permission is hereby granted, free of charge, to any person obtaining a
15# copy of this software and associated documentation files (the "Software"),
16# to deal in the Software without restriction, including without limitation
17# the rights to use, copy, modify, merge, publish, distribute, sublicense,
18# and/or sell copies of the Software, and to permit persons to whom the
19# Software is furnished to do so, subject to the following conditions:
20#
21# The above copyright notice and this permission notice shall be included
22# in all copies or substantial portions of the Software.
23#
24# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30# DEALINGS IN THE SOFTWARE.
31###############################################################################
32import sys
33import textwrap
34from argparse import RawDescriptionHelpFormatter
35from numbers import Number, Real
36from typing import Optional, Union, Sequence, Tuple
37import numpy as np
38
39from osgeo import gdal
40from osgeo_utils.auxiliary.base import PathLikeOrStr
41from osgeo_utils.auxiliary.progress import get_progress_callback, OptionalProgressCallback
42from osgeo_utils.auxiliary.util import PathOrDS, get_bands, open_ds
43from osgeo_utils.auxiliary.numpy_util import GDALTypeCodeAndNumericTypeCodeFromDataSet
44from osgeo_utils.auxiliary.gdal_argparse import GDALArgumentParser
45
46
47def gdal2xyz(srcfile: PathOrDS, dstfile: PathLikeOrStr = None,
48             srcwin: Optional[Sequence[int]] = None,
49             skip: Union[int, Sequence[int]] = 1,
50             band_nums: Optional[Sequence[int]] = None, delim: str = ' ',
51             skip_nodata: bool = False,
52             src_nodata: Optional[Union[Sequence, Number]] = None, dst_nodata: Optional[Union[Sequence, Number]] = None,
53             return_np_arrays: bool = False, pre_allocate_np_arrays: bool = True,
54             progress_callback: OptionalProgressCallback = ...) -> Optional[Tuple]:
55    """
56    translates a raster file (or dataset) into xyz format
57
58    skip - how many rows/cols to skip each iteration
59    srcwin (xoff, yoff, xsize, ysize) - Selects a subwindow from the source image for copying based on pixel/line location.
60    band_nums - selected input bands to process, None to process all.
61    delim - the delimiter to use between values in a line
62    skip_nodata - Exclude the output lines with nodata value (as determined by srcnodata)
63    src_nodata - The nodata value of the dataset (for skipping or replacing)
64        default (`None`) - Use the dataset NoDataValue;
65        `Sequence`/`Number` - use the given nodata value (per band or per dataset).
66    dst_nodata - Replace source nodata with a given nodata. Has an effect only if not setting `-skipnodata`
67        default(`None`) - use srcnodata, no replacement;
68        `Sequence`/`Number` - replace the `srcnodata` with the given nodata value (per band or per dataset).
69    srcfile - The source dataset filename or dataset object
70    dstfile - The output dataset filename; for dstfile=None - if return_np_arrays=False then output will be printed to stdout
71    return_np_arrays - return numpy arrays of the result, otherwise returns None
72    pre_allocate_np_arrays - pre-allocated result arrays.
73        Should be faster unless skip_nodata and the input is very sparse thus most data points will be skipped.
74    progress_callback - progress callback function. use None for quiet or Ellipsis for using the default callback
75    """
76
77    result = None
78
79    progress_callback = get_progress_callback(progress_callback)
80
81    # Open source file.
82    ds = open_ds(srcfile, access_mode=gdal.GA_ReadOnly)
83    if ds is None:
84        raise Exception(f'Could not open {srcfile}.')
85
86    bands = get_bands(ds, band_nums)
87    band_count = len(bands)
88
89    gt = ds.GetGeoTransform()
90
91    # Collect information on all the source files.
92    if srcwin is None:
93        srcwin = (0, 0, ds.RasterXSize, ds.RasterYSize)
94
95    dt, np_dt = GDALTypeCodeAndNumericTypeCodeFromDataSet(ds)
96
97    # Open the output file.
98    if dstfile is not None:
99        dst_fh = open(dstfile, 'wt')
100    elif return_np_arrays:
101        dst_fh = None
102    else:
103        dst_fh = sys.stdout
104
105    if dst_fh:
106        if dt == gdal.GDT_Int32 or dt == gdal.GDT_UInt32:
107            band_format = (("%d" + delim) * len(bands)).rstrip(delim) + '\n'
108        else:
109            band_format = (("%g" + delim) * len(bands)).rstrip(delim) + '\n'
110
111        # Setup an appropriate print format.
112        if abs(gt[0]) < 180 and abs(gt[3]) < 180 \
113            and abs(ds.RasterXSize * gt[1]) < 180 \
114            and abs(ds.RasterYSize * gt[5]) < 180:
115            frmt = '%.10g' + delim + '%.10g' + delim + '%s'
116        else:
117            frmt = '%.3f' + delim + '%.3f' + delim + '%s'
118
119    if isinstance(src_nodata, Number):
120        src_nodata = [src_nodata] * band_count
121    elif src_nodata is None:
122        src_nodata = list(band.GetNoDataValue() for band in bands)
123    if None in src_nodata:
124        src_nodata = None
125    if src_nodata is not None:
126        src_nodata = np.asarray(src_nodata, dtype=np_dt)
127
128    if isinstance(dst_nodata, Number):
129        dst_nodata = [dst_nodata] * band_count
130    if (dst_nodata is None) or (None in dst_nodata) or (src_nodata is None):
131        dst_nodata = None
132    if dst_nodata is not None:
133        dst_nodata = np.asarray(dst_nodata, dtype=np_dt)
134
135    skip_nodata = skip_nodata and (src_nodata is not None)
136    replace_nodata = (not skip_nodata) and (dst_nodata is not None)
137    process_nodata = skip_nodata or replace_nodata
138
139    if isinstance(skip, Sequence):
140        x_skip, y_skip = skip
141    else:
142        x_skip = y_skip = skip
143
144    x_off, y_off, x_size, y_size = srcwin
145    bands_count = len(bands)
146
147    nXBlocks = (x_size - x_off) // x_skip
148    nYBlocks = (y_size - y_off) // y_skip
149    progress_end = nXBlocks * nYBlocks
150    progress_curr = 0
151    progress_prev = -1
152    progress_parts = 100
153
154    if return_np_arrays:
155        size = progress_end if pre_allocate_np_arrays else 0
156        all_geo_x = np.empty(size)
157        all_geo_y = np.empty(size)
158        all_data = np.empty((size, band_count), dtype=np_dt)
159
160    # Loop emitting data.
161    idx = 0
162    for y in range(y_off, y_off + y_size, y_skip):
163
164        size = bands_count if pre_allocate_np_arrays else 0
165        data = np.empty((size, x_size), dtype=np_dt)  # dims: (bands_count, x_size)
166        for i_bnd, band in enumerate(bands):
167            band_data = band.ReadAsArray(x_off, y, x_size, 1)  # read one band line
168            if pre_allocate_np_arrays:
169                data[i_bnd] = band_data[0]
170            else:
171                data = np.append(data, band_data, axis=0)
172
173        for x_i in range(0, x_size, x_skip):
174
175            progress_curr += 1
176            if progress_callback:
177                progress_frac = progress_curr / progress_end
178                progress = int(progress_frac * progress_parts)
179                if progress > progress_prev:
180                    progress_prev = progress
181                    progress_callback(progress_frac)
182
183            x_i_data = data[:, x_i]  # single pixel, dims: (bands)
184            if process_nodata and np.array_equal(src_nodata, x_i_data):
185                if skip_nodata:
186                    continue
187                elif replace_nodata:
188                    x_i_data = dst_nodata
189
190            x = x_i + x_off
191
192            geo_x = gt[0] + (x + 0.5) * gt[1] + (y + 0.5) * gt[2]
193            geo_y = gt[3] + (x + 0.5) * gt[4] + (y + 0.5) * gt[5]
194
195            if dst_fh:
196                band_str = band_format % tuple(x_i_data)
197                line = frmt % (float(geo_x), float(geo_y), band_str)
198                dst_fh.write(line)
199            if return_np_arrays:
200                if pre_allocate_np_arrays:
201                    all_geo_x[idx] = geo_x
202                    all_geo_y[idx] = geo_y
203                    all_data[idx] = x_i_data
204                else:
205                    all_geo_x = np.append(all_geo_x, geo_x)
206                    all_geo_y = np.append(all_geo_y, geo_y)
207                    all_data = np.append(all_data, [x_i_data], axis=0)
208            idx += 1
209
210    if return_np_arrays:
211        nodata = None if skip_nodata else dst_nodata if replace_nodata else src_nodata
212        if idx != progress_curr:
213            all_geo_x = all_geo_x[:idx]
214            all_geo_y = all_geo_y[:idx]
215            all_data = all_data[:idx, :]
216        result = all_geo_x, all_geo_y, all_data.transpose(), nodata
217
218    return result
219
220
221def main(argv):
222    parser = GDALArgumentParser(
223        formatter_class=RawDescriptionHelpFormatter,
224        description=textwrap.dedent('''\
225            The gdal2xyz utility can be used to translate a raster file into xyz format.
226            It can be used as an alternative to gdal_translate of=xyz,
227            But supporting other options, for example:
228            * Select more then one band;
229            * Skip or replace nodata value;
230            * Return the output as numpy arrays.'''))
231
232    parser.add_argument("-skip", dest="skip", action="store_true", default=1,
233                        help="How many rows/cols to skip in each iteration.")
234
235    parser.add_argument("-srcwin", metavar=('xoff', 'yoff', 'xsize', 'ysize'), dest="srcwin", type=float, nargs=4,
236                        help="Selects a subwindow from the source image for copying based on pixel/line location")
237
238    parser.add_argument("-b", "-band", "--band", dest="band_nums", metavar="band", type=int, nargs='+',
239                        help="Select bands from the input spectral bands for output. "
240                             "Bands are numbered from 1 in the order spectral bands are specified. "
241                             "Multiple -b switches may be used. When no -b switch is used, the first band will be used."
242                             "In order to use all input bands set -allbands or -b 0..")
243
244    parser.add_argument("-allbands", "--allbands", dest="allbands", action="store_true",
245                        help="Select all input bands.")
246
247    parser.add_argument("-csv", dest="delim", const=',', default=' ', action="store_const",
248                        help="Use comma instead of space as a delimiter.")
249
250    parser.add_argument("-skipnodata", "--skipnodata", "-skip_nodata", dest="skip_nodata", action="store_true",
251                        help="Exclude the output lines with nodata value (as determined by srcnodata).")
252
253    parser.add_argument("-srcnodata", '-nodatavalue', dest="src_nodata", type=Real, nargs='*',
254                        help="The nodata value of the dataset (for skipping or replacing) "
255                             "Default (None) - Use the dataset nodata value; "
256                             "Sequence/Number - Use the given nodata value (per band or per dataset).")
257
258    parser.add_argument("-dstnodata", dest="dst_nodata", type=Real, nargs='*',
259                        help="Replace source nodata with a given nodata. "
260                             "Has an effect only if not setting -skipnodata. "
261                             "Default(None) - Use srcnodata, no replacement; "
262                             "Sequence/Number - Replace the srcnodata with the given nodata value "
263                             "(per band or per dataset).")
264
265    parser.add_argument("srcfile", metavar="src_dataset", type=str,
266                        help="The source dataset name. It can be either file name, "
267                             "URL of data source or subdataset name for multi-dataset files.")
268
269    parser.add_argument("dstfile", metavar="dst_dataset", type=str,
270                        help="The destination file name.")
271
272    args = parser.parse_args(argv[1:])
273    if args.allbands:
274        args.band_nums = None
275    elif not args.band_nums:
276        args.band_nums = 1
277    kwargs = vars(args)
278    del kwargs["allbands"]
279    try:
280        return gdal2xyz(**kwargs)
281    except IOError as e:
282        print(e)
283        return 1
284
285
286if __name__ == '__main__':
287    sys.exit(main(sys.argv))
288