1# =================================================================
2#
3# Authors: Tom Kralidis <tomkralidis@gmail.com>
4#
5# Copyright (c) 2020 Tom Kralidis
6#
7# Permission is hereby granted, free of charge, to any person
8# obtaining a copy of this software and associated documentation
9# files (the "Software"), to deal in the Software without
10# restriction, including without limitation the rights to use,
11# copy, modify, merge, publish, distribute, sublicense, and/or sell
12# copies of the Software, and to permit persons to whom the
13# Software is furnished to do so, subject to the following
14# conditions:
15#
16# The above copyright notice and this permission notice shall be
17# included in all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26# OTHER DEALINGS IN THE SOFTWARE.
27#
28# =================================================================
29
30from datetime import datetime
31import io
32from json import loads
33import logging
34import os
35from urllib.parse import urljoin
36
37from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError,
38                                    ProviderNotFoundError)
39from pygeoapi.util import file_modified_iso8601, get_path_basename
40
41LOGGER = logging.getLogger(__name__)
42
43
44class FileSystemProvider(BaseProvider):
45    """filesystem Provider"""
46
47    def __init__(self, provider_def):
48        """
49        Initialize object
50
51        :param provider_def: provider definition
52
53        :returns: pygeoapi.provider.filesystem.FileSystemProvider
54        """
55
56        super().__init__(provider_def)
57
58        if not os.path.exists(self.data):
59            msg = 'Directory does not exist: {}'.format(self.data)
60            LOGGER.error(msg)
61            raise ProviderConnectionError(msg)
62
63    def get_data_path(self, baseurl, urlpath, dirpath):
64        """
65        Gets directory listing or file description or raw file dump
66
67        :param baseurl: base URL of endpoint
68        :param urlpath: base path of URL
69        :param dirpath: directory basepath (equivalent of URL)
70
71        :returns: `dict` of file listing or `dict` of GeoJSON item or raw file
72        """
73
74        thispath = os.path.join(baseurl, urlpath)
75
76        resource_type = None
77        root_link = None
78        child_links = []
79
80        data_path = os.path.join(self.data, dirpath)
81        data_path = self.data + dirpath
82
83        if '/' not in dirpath:  # root
84            root_link = baseurl
85        else:
86            parentpath = urljoin(thispath, '.')
87            child_links.append({
88                'rel': 'parent',
89                'href': '{}?f=json'.format(parentpath),
90                'type': 'application/json'
91            })
92            child_links.append({
93                'rel': 'parent',
94                'href': parentpath,
95                'type': 'text/html'
96            })
97
98            depth = dirpath.count('/')
99            root_path = '/'.replace('/', '../' * depth, 1)
100            root_link = urljoin(thispath, root_path)
101
102        content = {
103            'links': [{
104                'rel': 'root',
105                'href': '{}?f=json'.format(root_link),
106                'type': 'application/json'
107                }, {
108                'rel': 'root',
109                'href': root_link,
110                'type': 'text/html'
111                }, {
112                'rel': 'self',
113                'href': '{}?f=json'.format(thispath),
114                'type': 'application/json',
115                }, {
116                'rel': 'self',
117                'href': thispath,
118                'type': 'text/html'
119                }
120            ]
121        }
122
123        LOGGER.debug('Checking if path exists as raw file or directory')
124        if data_path.endswith(tuple(self.file_types)):
125            resource_type = 'raw_file'
126        elif os.path.exists(data_path):
127            resource_type = 'directory'
128        else:
129            LOGGER.debug('Checking if path exists as file via file_types')
130            for ft in self.file_types:
131                tmp_path = '{}{}'.format(data_path, ft)
132                if os.path.exists(tmp_path):
133                    resource_type = 'file'
134                    data_path = tmp_path
135                    break
136
137        if resource_type is None:
138            msg = 'Resource does not exist: {}'.format(data_path)
139            LOGGER.error(msg)
140            raise ProviderNotFoundError(msg)
141
142        if resource_type == 'raw_file':
143            with io.open(data_path, 'rb') as fh:
144                return fh.read()
145
146        elif resource_type == 'directory':
147            content['type'] = 'Catalog'
148            dirpath2 = os.listdir(data_path)
149            dirpath2.sort()
150            for dc in dirpath2:
151                # TODO: handle a generic directory for tiles
152                if dc == "tiles":
153                    continue
154
155                fullpath = os.path.join(data_path, dc)
156                filectime = file_modified_iso8601(fullpath)
157                filesize = os.path.getsize(fullpath)
158
159                if os.path.isdir(fullpath):
160                    newpath = os.path.join(baseurl, urlpath, dc)
161#                    child_links.append({
162#                        'rel': 'child',
163#                        'href': '{}?f=json'.format(newpath),
164#                        'type': 'application/json'
165#                    })
166                    child_links.append({
167                        'rel': 'child',
168                        'href': newpath,
169                        'type': 'text/html',
170                        'created': filectime,
171                    })
172                elif os.path.isfile(fullpath):
173                    basename, extension = os.path.splitext(dc)
174                    newpath = os.path.join(baseurl, urlpath, basename)
175                    newpath2 = '{}{}'.format(newpath, extension)
176                    if extension in self.file_types:
177                        fullpath = os.path.join(data_path, dc)
178                        child_links.append({
179                            'rel': 'item',
180                            'href': newpath,
181                            'title': get_path_basename(newpath2),
182                            'created': filectime,
183                            'file:size': filesize
184                        })
185#                        child_links.append({
186#                            'rel': 'item',
187#                            'title': get_path_basename(newpath2),
188#                            'href': newpath,
189#                            'type': 'text/html',
190#                            'created': filectime,
191#                            'file:size': filesize
192#                        })
193
194        elif resource_type == 'file':
195            filename = os.path.basename(data_path)
196
197            id_ = os.path.splitext(filename)[0]
198            if urlpath:
199                filename = filename.replace(id_, '')
200            url = '{}/{}{}'.format(baseurl, urlpath, filename)
201
202            filectime = file_modified_iso8601(data_path)
203            filesize = os.path.getsize(data_path)
204
205            content = {
206                'id': id_,
207                'type': 'Feature',
208                'properties': {},
209                'links': [],
210                'assets': {}
211            }
212
213            content.update(_describe_file(data_path))
214
215            content['assets']['default'] = {
216                'href': url,
217                'created': filectime,
218                'file:size': filesize
219            }
220
221        content['links'].extend(child_links)
222
223        return content
224
225    def __repr__(self):
226        return '<FileSystemProvider> {}'.format(self.data)
227
228
229def _describe_file(filepath):
230    """
231    Helper function to describe a geospatial data
232    First checks if a sidecar mcf file is available, if so uses that
233    if not, script will parse the file to retrieve some info from the file
234
235    :param filepath: path to file
236
237    :returns: `dict` of GeoJSON item
238    """
239
240    content = {
241        'bbox': None,
242        'geometry': None,
243        'properties': {}
244    }
245
246    mcf_file = '{}.yml'.format(os.path.splitext(filepath)[0])
247
248    if os.path.isfile(mcf_file):
249        try:
250            from pygeometa.core import read_mcf, MCFReadError
251            from pygeometa.schemas.stac import STACItemOutputSchema
252
253            md = read_mcf(mcf_file)
254            stacjson = STACItemOutputSchema.write(STACItemOutputSchema, md)
255            stacdata = loads(stacjson)
256            for k, v in stacdata.items():
257                content[k] = v
258        except ImportError:
259            LOGGER.debug('pygeometa not found')
260        except MCFReadError as err:
261            LOGGER.warning('MCF error: {}'.format(err))
262    else:
263        LOGGER.debug('No mcf found at: {}'.format(mcf_file))
264
265    if content['geometry'] is None and content['bbox'] is None:
266        try:
267            import rasterio
268            from rasterio.crs import CRS
269            from rasterio.warp import transform_bounds
270        except ImportError as err:
271            LOGGER.warning('rasterio not found')
272            LOGGER.warning(err)
273            return content
274
275        try:
276            import fiona
277        except ImportError as err:
278            LOGGER.warning('fiona not found')
279            LOGGER.warning(err)
280            return content
281
282        try:  # raster
283            LOGGER.debug('Testing raster data detection')
284            d = rasterio.open(filepath)
285            scrs = CRS(d.crs)
286            if scrs.to_epsg() not in [None, 4326]:
287                tcrs = CRS.from_epsg(4326)
288                bnds = transform_bounds(scrs, tcrs,
289                                        d.bounds[0], d.bounds[1],
290                                        d.bounds[2], d.bounds[3])
291                content['properties']['projection'] = scrs.to_epsg()
292            else:
293                bnds = [d.bounds.left, d.bounds.bottom,
294                        d.bounds.right, d.bounds.top]
295            content['bbox'] = bnds
296            content['geometry'] = {
297                'type': 'Polygon',
298                'coordinates': [[
299                    [bnds[0],  bnds[1]],
300                    [bnds[0],  bnds[3]],
301                    [bnds[2], bnds[3]],
302                    [bnds[2], bnds[1]],
303                    [bnds[0],  bnds[1]]
304                ]]
305            }
306            for k, v in d.tags(d.count).items():
307                content['properties'][k] = v
308                if k in ['GRIB_REF_TIME']:
309                    value = int(v.split()[0])
310                    datetime_ = datetime.fromtimestamp(value)
311                    content['properties']['datetime'] = datetime_.isoformat() + 'Z'  # noqa
312        except rasterio.errors.RasterioIOError:
313            try:
314                LOGGER.debug('Testing vector data detection')
315                d = fiona.open(filepath)
316                scrs = CRS(d.crs)
317                if scrs.to_epsg() not in [None, 4326]:
318                    tcrs = CRS.from_epsg(4326)
319                    bnds = transform_bounds(scrs, tcrs,
320                                            d.bounds[0], d.bounds[1],
321                                            d.bounds[2], d.bounds[3])
322                    content['properties']['projection'] = scrs.to_epsg()
323                else:
324                    bnds = d.bounds
325
326                if d.schema['geometry'] not in [None, 'None']:
327                    content['bbox'] = [
328                        bnds[0],
329                        bnds[1],
330                        bnds[2],
331                        bnds[3]
332                    ]
333                    content['geometry'] = {
334                        'type': 'Polygon',
335                        'coordinates': [[
336                            [bnds[0], bnds[1]],
337                            [bnds[0], bnds[3]],
338                            [bnds[2], bnds[3]],
339                            [bnds[2], bnds[1]],
340                            [bnds[0], bnds[1]]
341                        ]]
342                    }
343
344                for k, v in d.schema['properties'].items():
345                    content['properties'][k] = v
346
347                if d.driver == 'ESRI Shapefile':
348                    id_ = os.path.splitext(os.path.basename(filepath))[0]
349                    content['assets'] = {}
350                    for suffix in ['shx', 'dbf', 'prj']:
351                        fullpath = '{}.{}'.format(
352                            os.path.splitext(filepath)[0], suffix)
353
354                        if os.path.exists(fullpath):
355                            filectime = file_modified_iso8601(fullpath)
356                            filesize = os.path.getsize(fullpath)
357
358                            content['assets'][suffix] = {
359                                'href': './{}.{}'.format(id_, suffix),
360                                'created': filectime,
361                                'file:size': filesize
362                            }
363
364            except fiona.errors.DriverError:
365                LOGGER.debug('Could not detect raster or vector data')
366
367    return content
368