1#!/usr/bin/env python3 2############################################################################### 3# $Id: gdal2xyz.py 2be8649aadb3f869590306cd5a18a3387a650581 2021-04-23 12:08:46 +0300 Idan Miara $ 4# 5# Project: GDAL 6# Purpose: Script to translate GDAL supported raster into XYZ ASCII 7# point stream. 8# Author: Frank Warmerdam, warmerdam@pobox.com 9# 10############################################################################### 11# Copyright (c) 2002, Frank Warmerdam <warmerdam@pobox.com> 12# Copyright (c) 2020-2021, Idan Miara <idan@miara.com> 13# 14# Permission is hereby granted, free of charge, to any person obtaining a 15# copy of this software and associated documentation files (the "Software"), 16# to deal in the Software without restriction, including without limitation 17# the rights to use, copy, modify, merge, publish, distribute, sublicense, 18# and/or sell copies of the Software, and to permit persons to whom the 19# Software is furnished to do so, subject to the following conditions: 20# 21# The above copyright notice and this permission notice shall be included 22# in all copies or substantial portions of the Software. 23# 24# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 25# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 27# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 30# DEALINGS IN THE SOFTWARE. 31############################################################################### 32import sys 33import textwrap 34from argparse import RawDescriptionHelpFormatter 35from numbers import Number, Real 36from typing import Optional, Union, Sequence, Tuple 37import numpy as np 38 39from osgeo import gdal 40from osgeo_utils.auxiliary.base import PathLikeOrStr 41from osgeo_utils.auxiliary.progress import get_progress_callback, OptionalProgressCallback 42from osgeo_utils.auxiliary.util import PathOrDS, get_bands, open_ds 43from osgeo_utils.auxiliary.numpy_util import GDALTypeCodeAndNumericTypeCodeFromDataSet 44from osgeo_utils.auxiliary.gdal_argparse import GDALArgumentParser 45 46 47def gdal2xyz(srcfile: PathOrDS, dstfile: PathLikeOrStr = None, 48 srcwin: Optional[Sequence[int]] = None, 49 skip: Union[int, Sequence[int]] = 1, 50 band_nums: Optional[Sequence[int]] = None, delim: str = ' ', 51 skip_nodata: bool = False, 52 src_nodata: Optional[Union[Sequence, Number]] = None, dst_nodata: Optional[Union[Sequence, Number]] = None, 53 return_np_arrays: bool = False, pre_allocate_np_arrays: bool = True, 54 progress_callback: OptionalProgressCallback = ...) -> Optional[Tuple]: 55 """ 56 translates a raster file (or dataset) into xyz format 57 58 skip - how many rows/cols to skip each iteration 59 srcwin (xoff, yoff, xsize, ysize) - Selects a subwindow from the source image for copying based on pixel/line location. 60 band_nums - selected input bands to process, None to process all. 61 delim - the delimiter to use between values in a line 62 skip_nodata - Exclude the output lines with nodata value (as determined by srcnodata) 63 src_nodata - The nodata value of the dataset (for skipping or replacing) 64 default (`None`) - Use the dataset NoDataValue; 65 `Sequence`/`Number` - use the given nodata value (per band or per dataset). 66 dst_nodata - Replace source nodata with a given nodata. Has an effect only if not setting `-skipnodata` 67 default(`None`) - use srcnodata, no replacement; 68 `Sequence`/`Number` - replace the `srcnodata` with the given nodata value (per band or per dataset). 69 srcfile - The source dataset filename or dataset object 70 dstfile - The output dataset filename; for dstfile=None - if return_np_arrays=False then output will be printed to stdout 71 return_np_arrays - return numpy arrays of the result, otherwise returns None 72 pre_allocate_np_arrays - pre-allocated result arrays. 73 Should be faster unless skip_nodata and the input is very sparse thus most data points will be skipped. 74 progress_callback - progress callback function. use None for quiet or Ellipsis for using the default callback 75 """ 76 77 result = None 78 79 progress_callback = get_progress_callback(progress_callback) 80 81 # Open source file. 82 ds = open_ds(srcfile, access_mode=gdal.GA_ReadOnly) 83 if ds is None: 84 raise Exception(f'Could not open {srcfile}.') 85 86 bands = get_bands(ds, band_nums) 87 band_count = len(bands) 88 89 gt = ds.GetGeoTransform() 90 91 # Collect information on all the source files. 92 if srcwin is None: 93 srcwin = (0, 0, ds.RasterXSize, ds.RasterYSize) 94 95 dt, np_dt = GDALTypeCodeAndNumericTypeCodeFromDataSet(ds) 96 97 # Open the output file. 98 if dstfile is not None: 99 dst_fh = open(dstfile, 'wt') 100 elif return_np_arrays: 101 dst_fh = None 102 else: 103 dst_fh = sys.stdout 104 105 if dst_fh: 106 if dt == gdal.GDT_Int32 or dt == gdal.GDT_UInt32: 107 band_format = (("%d" + delim) * len(bands)).rstrip(delim) + '\n' 108 else: 109 band_format = (("%g" + delim) * len(bands)).rstrip(delim) + '\n' 110 111 # Setup an appropriate print format. 112 if abs(gt[0]) < 180 and abs(gt[3]) < 180 \ 113 and abs(ds.RasterXSize * gt[1]) < 180 \ 114 and abs(ds.RasterYSize * gt[5]) < 180: 115 frmt = '%.10g' + delim + '%.10g' + delim + '%s' 116 else: 117 frmt = '%.3f' + delim + '%.3f' + delim + '%s' 118 119 if isinstance(src_nodata, Number): 120 src_nodata = [src_nodata] * band_count 121 elif src_nodata is None: 122 src_nodata = list(band.GetNoDataValue() for band in bands) 123 if None in src_nodata: 124 src_nodata = None 125 if src_nodata is not None: 126 src_nodata = np.asarray(src_nodata, dtype=np_dt) 127 128 if isinstance(dst_nodata, Number): 129 dst_nodata = [dst_nodata] * band_count 130 if (dst_nodata is None) or (None in dst_nodata) or (src_nodata is None): 131 dst_nodata = None 132 if dst_nodata is not None: 133 dst_nodata = np.asarray(dst_nodata, dtype=np_dt) 134 135 skip_nodata = skip_nodata and (src_nodata is not None) 136 replace_nodata = (not skip_nodata) and (dst_nodata is not None) 137 process_nodata = skip_nodata or replace_nodata 138 139 if isinstance(skip, Sequence): 140 x_skip, y_skip = skip 141 else: 142 x_skip = y_skip = skip 143 144 x_off, y_off, x_size, y_size = srcwin 145 bands_count = len(bands) 146 147 nXBlocks = (x_size - x_off) // x_skip 148 nYBlocks = (y_size - y_off) // y_skip 149 progress_end = nXBlocks * nYBlocks 150 progress_curr = 0 151 progress_prev = -1 152 progress_parts = 100 153 154 if return_np_arrays: 155 size = progress_end if pre_allocate_np_arrays else 0 156 all_geo_x = np.empty(size) 157 all_geo_y = np.empty(size) 158 all_data = np.empty((size, band_count), dtype=np_dt) 159 160 # Loop emitting data. 161 idx = 0 162 for y in range(y_off, y_off + y_size, y_skip): 163 164 size = bands_count if pre_allocate_np_arrays else 0 165 data = np.empty((size, x_size), dtype=np_dt) # dims: (bands_count, x_size) 166 for i_bnd, band in enumerate(bands): 167 band_data = band.ReadAsArray(x_off, y, x_size, 1) # read one band line 168 if pre_allocate_np_arrays: 169 data[i_bnd] = band_data[0] 170 else: 171 data = np.append(data, band_data, axis=0) 172 173 for x_i in range(0, x_size, x_skip): 174 175 progress_curr += 1 176 if progress_callback: 177 progress_frac = progress_curr / progress_end 178 progress = int(progress_frac * progress_parts) 179 if progress > progress_prev: 180 progress_prev = progress 181 progress_callback(progress_frac) 182 183 x_i_data = data[:, x_i] # single pixel, dims: (bands) 184 if process_nodata and np.array_equal(src_nodata, x_i_data): 185 if skip_nodata: 186 continue 187 elif replace_nodata: 188 x_i_data = dst_nodata 189 190 x = x_i + x_off 191 192 geo_x = gt[0] + (x + 0.5) * gt[1] + (y + 0.5) * gt[2] 193 geo_y = gt[3] + (x + 0.5) * gt[4] + (y + 0.5) * gt[5] 194 195 if dst_fh: 196 band_str = band_format % tuple(x_i_data) 197 line = frmt % (float(geo_x), float(geo_y), band_str) 198 dst_fh.write(line) 199 if return_np_arrays: 200 if pre_allocate_np_arrays: 201 all_geo_x[idx] = geo_x 202 all_geo_y[idx] = geo_y 203 all_data[idx] = x_i_data 204 else: 205 all_geo_x = np.append(all_geo_x, geo_x) 206 all_geo_y = np.append(all_geo_y, geo_y) 207 all_data = np.append(all_data, [x_i_data], axis=0) 208 idx += 1 209 210 if return_np_arrays: 211 nodata = None if skip_nodata else dst_nodata if replace_nodata else src_nodata 212 if idx != progress_curr: 213 all_geo_x = all_geo_x[:idx] 214 all_geo_y = all_geo_y[:idx] 215 all_data = all_data[:idx, :] 216 result = all_geo_x, all_geo_y, all_data.transpose(), nodata 217 218 return result 219 220 221def main(argv): 222 parser = GDALArgumentParser( 223 formatter_class=RawDescriptionHelpFormatter, 224 description=textwrap.dedent('''\ 225 The gdal2xyz utility can be used to translate a raster file into xyz format. 226 It can be used as an alternative to gdal_translate of=xyz, 227 But supporting other options, for example: 228 * Select more then one band; 229 * Skip or replace nodata value; 230 * Return the output as numpy arrays.''')) 231 232 parser.add_argument("-skip", dest="skip", action="store_true", default=1, 233 help="How many rows/cols to skip in each iteration.") 234 235 parser.add_argument("-srcwin", metavar=('xoff', 'yoff', 'xsize', 'ysize'), dest="srcwin", type=float, nargs=4, 236 help="Selects a subwindow from the source image for copying based on pixel/line location") 237 238 parser.add_argument("-b", "-band", "--band", dest="band_nums", metavar="band", type=int, nargs='+', 239 help="Select bands from the input spectral bands for output. " 240 "Bands are numbered from 1 in the order spectral bands are specified. " 241 "Multiple -b switches may be used. When no -b switch is used, the first band will be used." 242 "In order to use all input bands set -allbands or -b 0..") 243 244 parser.add_argument("-allbands", "--allbands", dest="allbands", action="store_true", 245 help="Select all input bands.") 246 247 parser.add_argument("-csv", dest="delim", const=',', default=' ', action="store_const", 248 help="Use comma instead of space as a delimiter.") 249 250 parser.add_argument("-skipnodata", "--skipnodata", "-skip_nodata", dest="skip_nodata", action="store_true", 251 help="Exclude the output lines with nodata value (as determined by srcnodata).") 252 253 parser.add_argument("-srcnodata", '-nodatavalue', dest="src_nodata", type=Real, nargs='*', 254 help="The nodata value of the dataset (for skipping or replacing) " 255 "Default (None) - Use the dataset nodata value; " 256 "Sequence/Number - Use the given nodata value (per band or per dataset).") 257 258 parser.add_argument("-dstnodata", dest="dst_nodata", type=Real, nargs='*', 259 help="Replace source nodata with a given nodata. " 260 "Has an effect only if not setting -skipnodata. " 261 "Default(None) - Use srcnodata, no replacement; " 262 "Sequence/Number - Replace the srcnodata with the given nodata value " 263 "(per band or per dataset).") 264 265 parser.add_argument("srcfile", metavar="src_dataset", type=str, 266 help="The source dataset name. It can be either file name, " 267 "URL of data source or subdataset name for multi-dataset files.") 268 269 parser.add_argument("dstfile", metavar="dst_dataset", type=str, 270 help="The destination file name.") 271 272 args = parser.parse_args(argv[1:]) 273 if args.allbands: 274 args.band_nums = None 275 elif not args.band_nums: 276 args.band_nums = 1 277 kwargs = vars(args) 278 del kwargs["allbands"] 279 try: 280 return gdal2xyz(**kwargs) 281 except IOError as e: 282 print(e) 283 return 1 284 285 286if __name__ == '__main__': 287 sys.exit(main(sys.argv)) 288