1# ================================================================= 2# 3# Authors: Tom Kralidis <tomkralidis@gmail.com> 4# 5# Copyright (c) 2020 Tom Kralidis 6# 7# Permission is hereby granted, free of charge, to any person 8# obtaining a copy of this software and associated documentation 9# files (the "Software"), to deal in the Software without 10# restriction, including without limitation the rights to use, 11# copy, modify, merge, publish, distribute, sublicense, and/or sell 12# copies of the Software, and to permit persons to whom the 13# Software is furnished to do so, subject to the following 14# conditions: 15# 16# The above copyright notice and this permission notice shall be 17# included in all copies or substantial portions of the Software. 18# 19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 21# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 23# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 24# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 26# OTHER DEALINGS IN THE SOFTWARE. 27# 28# ================================================================= 29 30from datetime import datetime 31import io 32from json import loads 33import logging 34import os 35from urllib.parse import urljoin 36 37from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError, 38 ProviderNotFoundError) 39from pygeoapi.util import file_modified_iso8601, get_path_basename 40 41LOGGER = logging.getLogger(__name__) 42 43 44class FileSystemProvider(BaseProvider): 45 """filesystem Provider""" 46 47 def __init__(self, provider_def): 48 """ 49 Initialize object 50 51 :param provider_def: provider definition 52 53 :returns: pygeoapi.provider.filesystem.FileSystemProvider 54 """ 55 56 super().__init__(provider_def) 57 58 if not os.path.exists(self.data): 59 msg = 'Directory does not exist: {}'.format(self.data) 60 LOGGER.error(msg) 61 raise ProviderConnectionError(msg) 62 63 def get_data_path(self, baseurl, urlpath, dirpath): 64 """ 65 Gets directory listing or file description or raw file dump 66 67 :param baseurl: base URL of endpoint 68 :param urlpath: base path of URL 69 :param dirpath: directory basepath (equivalent of URL) 70 71 :returns: `dict` of file listing or `dict` of GeoJSON item or raw file 72 """ 73 74 thispath = os.path.join(baseurl, urlpath) 75 76 resource_type = None 77 root_link = None 78 child_links = [] 79 80 data_path = os.path.join(self.data, dirpath) 81 data_path = self.data + dirpath 82 83 if '/' not in dirpath: # root 84 root_link = baseurl 85 else: 86 parentpath = urljoin(thispath, '.') 87 child_links.append({ 88 'rel': 'parent', 89 'href': '{}?f=json'.format(parentpath), 90 'type': 'application/json' 91 }) 92 child_links.append({ 93 'rel': 'parent', 94 'href': parentpath, 95 'type': 'text/html' 96 }) 97 98 depth = dirpath.count('/') 99 root_path = '/'.replace('/', '../' * depth, 1) 100 root_link = urljoin(thispath, root_path) 101 102 content = { 103 'links': [{ 104 'rel': 'root', 105 'href': '{}?f=json'.format(root_link), 106 'type': 'application/json' 107 }, { 108 'rel': 'root', 109 'href': root_link, 110 'type': 'text/html' 111 }, { 112 'rel': 'self', 113 'href': '{}?f=json'.format(thispath), 114 'type': 'application/json', 115 }, { 116 'rel': 'self', 117 'href': thispath, 118 'type': 'text/html' 119 } 120 ] 121 } 122 123 LOGGER.debug('Checking if path exists as raw file or directory') 124 if data_path.endswith(tuple(self.file_types)): 125 resource_type = 'raw_file' 126 elif os.path.exists(data_path): 127 resource_type = 'directory' 128 else: 129 LOGGER.debug('Checking if path exists as file via file_types') 130 for ft in self.file_types: 131 tmp_path = '{}{}'.format(data_path, ft) 132 if os.path.exists(tmp_path): 133 resource_type = 'file' 134 data_path = tmp_path 135 break 136 137 if resource_type is None: 138 msg = 'Resource does not exist: {}'.format(data_path) 139 LOGGER.error(msg) 140 raise ProviderNotFoundError(msg) 141 142 if resource_type == 'raw_file': 143 with io.open(data_path, 'rb') as fh: 144 return fh.read() 145 146 elif resource_type == 'directory': 147 content['type'] = 'Catalog' 148 dirpath2 = os.listdir(data_path) 149 dirpath2.sort() 150 for dc in dirpath2: 151 # TODO: handle a generic directory for tiles 152 if dc == "tiles": 153 continue 154 155 fullpath = os.path.join(data_path, dc) 156 filectime = file_modified_iso8601(fullpath) 157 filesize = os.path.getsize(fullpath) 158 159 if os.path.isdir(fullpath): 160 newpath = os.path.join(baseurl, urlpath, dc) 161# child_links.append({ 162# 'rel': 'child', 163# 'href': '{}?f=json'.format(newpath), 164# 'type': 'application/json' 165# }) 166 child_links.append({ 167 'rel': 'child', 168 'href': newpath, 169 'type': 'text/html', 170 'created': filectime, 171 }) 172 elif os.path.isfile(fullpath): 173 basename, extension = os.path.splitext(dc) 174 newpath = os.path.join(baseurl, urlpath, basename) 175 newpath2 = '{}{}'.format(newpath, extension) 176 if extension in self.file_types: 177 fullpath = os.path.join(data_path, dc) 178 child_links.append({ 179 'rel': 'item', 180 'href': newpath, 181 'title': get_path_basename(newpath2), 182 'created': filectime, 183 'file:size': filesize 184 }) 185# child_links.append({ 186# 'rel': 'item', 187# 'title': get_path_basename(newpath2), 188# 'href': newpath, 189# 'type': 'text/html', 190# 'created': filectime, 191# 'file:size': filesize 192# }) 193 194 elif resource_type == 'file': 195 filename = os.path.basename(data_path) 196 197 id_ = os.path.splitext(filename)[0] 198 if urlpath: 199 filename = filename.replace(id_, '') 200 url = '{}/{}{}'.format(baseurl, urlpath, filename) 201 202 filectime = file_modified_iso8601(data_path) 203 filesize = os.path.getsize(data_path) 204 205 content = { 206 'id': id_, 207 'type': 'Feature', 208 'properties': {}, 209 'links': [], 210 'assets': {} 211 } 212 213 content.update(_describe_file(data_path)) 214 215 content['assets']['default'] = { 216 'href': url, 217 'created': filectime, 218 'file:size': filesize 219 } 220 221 content['links'].extend(child_links) 222 223 return content 224 225 def __repr__(self): 226 return '<FileSystemProvider> {}'.format(self.data) 227 228 229def _describe_file(filepath): 230 """ 231 Helper function to describe a geospatial data 232 First checks if a sidecar mcf file is available, if so uses that 233 if not, script will parse the file to retrieve some info from the file 234 235 :param filepath: path to file 236 237 :returns: `dict` of GeoJSON item 238 """ 239 240 content = { 241 'bbox': None, 242 'geometry': None, 243 'properties': {} 244 } 245 246 mcf_file = '{}.yml'.format(os.path.splitext(filepath)[0]) 247 248 if os.path.isfile(mcf_file): 249 try: 250 from pygeometa.core import read_mcf, MCFReadError 251 from pygeometa.schemas.stac import STACItemOutputSchema 252 253 md = read_mcf(mcf_file) 254 stacjson = STACItemOutputSchema.write(STACItemOutputSchema, md) 255 stacdata = loads(stacjson) 256 for k, v in stacdata.items(): 257 content[k] = v 258 except ImportError: 259 LOGGER.debug('pygeometa not found') 260 except MCFReadError as err: 261 LOGGER.warning('MCF error: {}'.format(err)) 262 else: 263 LOGGER.debug('No mcf found at: {}'.format(mcf_file)) 264 265 if content['geometry'] is None and content['bbox'] is None: 266 try: 267 import rasterio 268 from rasterio.crs import CRS 269 from rasterio.warp import transform_bounds 270 except ImportError as err: 271 LOGGER.warning('rasterio not found') 272 LOGGER.warning(err) 273 return content 274 275 try: 276 import fiona 277 except ImportError as err: 278 LOGGER.warning('fiona not found') 279 LOGGER.warning(err) 280 return content 281 282 try: # raster 283 LOGGER.debug('Testing raster data detection') 284 d = rasterio.open(filepath) 285 scrs = CRS(d.crs) 286 if scrs.to_epsg() not in [None, 4326]: 287 tcrs = CRS.from_epsg(4326) 288 bnds = transform_bounds(scrs, tcrs, 289 d.bounds[0], d.bounds[1], 290 d.bounds[2], d.bounds[3]) 291 content['properties']['projection'] = scrs.to_epsg() 292 else: 293 bnds = [d.bounds.left, d.bounds.bottom, 294 d.bounds.right, d.bounds.top] 295 content['bbox'] = bnds 296 content['geometry'] = { 297 'type': 'Polygon', 298 'coordinates': [[ 299 [bnds[0], bnds[1]], 300 [bnds[0], bnds[3]], 301 [bnds[2], bnds[3]], 302 [bnds[2], bnds[1]], 303 [bnds[0], bnds[1]] 304 ]] 305 } 306 for k, v in d.tags(d.count).items(): 307 content['properties'][k] = v 308 if k in ['GRIB_REF_TIME']: 309 value = int(v.split()[0]) 310 datetime_ = datetime.fromtimestamp(value) 311 content['properties']['datetime'] = datetime_.isoformat() + 'Z' # noqa 312 except rasterio.errors.RasterioIOError: 313 try: 314 LOGGER.debug('Testing vector data detection') 315 d = fiona.open(filepath) 316 scrs = CRS(d.crs) 317 if scrs.to_epsg() not in [None, 4326]: 318 tcrs = CRS.from_epsg(4326) 319 bnds = transform_bounds(scrs, tcrs, 320 d.bounds[0], d.bounds[1], 321 d.bounds[2], d.bounds[3]) 322 content['properties']['projection'] = scrs.to_epsg() 323 else: 324 bnds = d.bounds 325 326 if d.schema['geometry'] not in [None, 'None']: 327 content['bbox'] = [ 328 bnds[0], 329 bnds[1], 330 bnds[2], 331 bnds[3] 332 ] 333 content['geometry'] = { 334 'type': 'Polygon', 335 'coordinates': [[ 336 [bnds[0], bnds[1]], 337 [bnds[0], bnds[3]], 338 [bnds[2], bnds[3]], 339 [bnds[2], bnds[1]], 340 [bnds[0], bnds[1]] 341 ]] 342 } 343 344 for k, v in d.schema['properties'].items(): 345 content['properties'][k] = v 346 347 if d.driver == 'ESRI Shapefile': 348 id_ = os.path.splitext(os.path.basename(filepath))[0] 349 content['assets'] = {} 350 for suffix in ['shx', 'dbf', 'prj']: 351 fullpath = '{}.{}'.format( 352 os.path.splitext(filepath)[0], suffix) 353 354 if os.path.exists(fullpath): 355 filectime = file_modified_iso8601(fullpath) 356 filesize = os.path.getsize(fullpath) 357 358 content['assets'][suffix] = { 359 'href': './{}.{}'.format(id_, suffix), 360 'created': filectime, 361 'file:size': filesize 362 } 363 364 except fiona.errors.DriverError: 365 LOGGER.debug('Could not detect raster or vector data') 366 367 return content 368