1# -*- coding: utf-8 -*-
2from __future__ import absolute_import, print_function, division
3
4import logging
5import sys
6from contextlib import contextmanager
7
8from petl.compat import PY3
9from petl.io.sources import register_reader, register_writer, get_reader, get_writer
10
11logger = logging.getLogger(__name__)
12
13# region RemoteSource
14
15
16class RemoteSource(object):
17    """Read or write directly from files in remote filesystems.
18
19    This source handles many filesystems that are selected based on the
20    protocol passed in the `url` argument.
21
22    The url should be specified in `to..()` and `from...()` functions. E.g.::
23
24        >>> import petl as etl
25        >>>
26        >>> def example_s3():
27        ...     url = 's3://mybucket/prefix/to/myfilename.csv'
28        ...     data = b'foo,bar\\na,1\\nb,2\\nc,2\\n'
29        ...
30        ...     etl.tocsv(data, url)
31        ...     tbl = etl.fromcsv(url)
32        ...
33        >>> example_s3() # doctest: +SKIP
34        +-----+-----+
35        | foo | bar |
36        +=====+=====+
37        | 'a' | '1' |
38        +-----+-----+
39        | 'b' | '2' |
40        +-----+-----+
41        | 'c' | '2' |
42        +-----+-----+
43
44    This source uses `fsspec`_ to provide the data transfer with the remote
45    filesystem. Check the `Built-in Implementations <fs_builtin>`_ for available
46    remote implementations.
47
48    Some filesystem can use `URL chaining <fs_chain>`_ for compound I/O.
49
50    .. note::
51
52        For working this source require `fsspec`_ to be installed, e.g.::
53
54            $ pip install fsspec
55
56        Some remote filesystems require aditional packages to be installed.
57        Check  `Known Implementations <fs_known>`_ for checking what packages
58        need to be installed, e.g.::
59
60            $ pip install s3fs     # AWS S3
61            $ pip install gcsfs    # Google Cloud Storage
62            $ pip install adlfs    # Azure Blob service
63            $ pip install paramiko # SFTP
64            $ pip install requests # HTTP, github
65
66    .. versionadded:: 1.6.0
67
68    .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/
69    .. _fs_builtin: https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
70    .. _fs_known: https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
71    .. _fs_chain: https://filesystem-spec.readthedocs.io/en/latest/features.html#url-chaining
72    """
73
74    def __init__(self, url, **kwargs):
75        self.url = url
76        self.kwargs = kwargs
77
78    def open_file(self, mode="rb"):
79        import fsspec
80        # auto_mkdir=True can fail in some filesystems or without permission for full path
81        # E.g: s3fs tries to create a bucket when writing into a folder does not exists
82        fs = fsspec.open(self.url, mode=mode, compression='infer', auto_mkdir=False, **self.kwargs)
83        return fs
84
85    @contextmanager
86    def open(self, mode="rb"):
87        mode2 = mode[:1] + r"b"  # python2
88        fs = self.open_file(mode=mode2)
89        with fs as source:
90            yield source
91
92
93# registering filesystems with packages installed
94
95
96def _register_filesystems(only_available=False):
97    """Register all known fsspec implementations as remote source."""
98    from fsspec.registry import known_implementations, registry
99    # https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
100    # https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
101    _register_filesystems_from(known_implementations, only_available)
102    # https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.registry.register_implementation
103    _register_filesystems_from(registry, only_available)
104
105
106def _register_filesystems_from(fsspec_registry, only_available):
107    """Register each fsspec provider from this registry as remote source."""
108    for protocol, spec in fsspec_registry.items():
109        missing_deps = "err" in spec
110        if missing_deps and only_available:
111            # this could lead to only buit-in implementations available
112            # Other Known Implementations are reported with 'err' even even
113            # the package is installed
114            continue
115        # When missing a package for fsspec use the available source in petl
116        # E.g: fsspec requires `requests` package installed for handling http and https
117        # but petl has URLSource that can work with urlib
118        has_reader = get_reader(protocol)
119        if not missing_deps or has_reader is None:
120            register_reader(protocol, RemoteSource)
121        has_writer = get_writer(protocol)
122        if not missing_deps or has_writer is None:
123            register_writer(protocol, RemoteSource)
124
125
126def _try_register_filesystems():
127    try:
128        # pylint: disable=unused-import
129        import fsspec  # noqa: F401
130    except ImportError:
131        logger.debug("# Missing fsspec package. Install with: pip install fsspec")
132    else:
133        try:
134            _register_filesystems()
135        except Exception as ex:
136            raise ImportError("# ERROR: failed to register fsspec filesystems", ex)
137
138
139if PY3:
140    _try_register_filesystems()
141
142# endregion
143
144# region SMBSource
145
146
147class SMBSource(object):
148    """Downloads or uploads to Windows and Samba network drives. E.g.::
149
150        >>> def example_smb():
151        ...     import petl as etl
152        ...     url = 'smb://user:password@server/share/folder/file.csv'
153        ...     data = b'foo,bar\\na,1\\nb,2\\nc,2\\n'
154        ...     etl.tocsv(data, url)
155        ...     tbl = etl.fromcsv(url)
156        ...
157        >>> example_smb() # doctest: +SKIP
158        +-----+-----+
159        | foo | bar |
160        +=====+=====+
161        | 'a' | '1' |
162        +-----+-----+
163        | 'b' | '2' |
164        +-----+-----+
165        | 'c' | '2' |
166        +-----+-----+
167
168    The argument `url` (str) must have a URI with format:
169    `smb://workgroup;user:password@server:port/share/folder/file.csv`.
170
171    Note that you need to pass in a valid hostname or IP address for the host
172    component of the URL. Do not use the Windows/NetBIOS machine name for the
173    host component.
174
175    The first component of the path in the URL points to the name of the shared
176    folder. Subsequent path components will point to the directory/folder/file.
177
178    .. note::
179
180        For working this source require `smbprotocol`_ to be installed, e.g.::
181
182            $ pip install smbprotocol[kerberos]
183
184    .. versionadded:: 1.5.0
185
186    .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
187    """
188
189    def __init__(self, url, **kwargs):
190        self.url = url
191        self.kwargs = kwargs
192
193    @contextmanager
194    def open(self, mode="rb"):
195        mode2 = mode[:1] + r"b"  # python2
196        source = _open_file_smbprotocol(self.url, mode=mode2, **self.kwargs)
197        try:
198            yield source
199        finally:
200            source.close()
201
202
203def _open_file_smbprotocol(url, mode="rb", **kwargs):
204
205    _domain, host, port, user, passwd, server_path = _parse_smb_url(url)
206    import smbclient
207
208    try:
209        # register the server with explicit credentials
210        if user:
211            smbclient.register_session(
212                host, username=user, password=passwd, port=port
213            )
214        # Read an existing file as bytes
215        mode2 = mode[:1] + r"b"
216        filehandle = smbclient.open_file(server_path, mode=mode2, **kwargs)
217        return filehandle
218
219    except Exception as ex:
220        raise ConnectionError("SMB error: %s" % ex).with_traceback(sys.exc_info()[2])
221
222
223def _parse_smb_url(url):
224    e = "SMB url must be smb://workgroup;user:password@server:port/share/folder/file.txt: "
225
226    if not url:
227        raise ValueError("SMB error: no host given")
228    if not url.startswith("smb://"):
229        raise ValueError(e + url)
230
231    if PY3:
232        from urllib.parse import urlparse
233    else:
234        from urlparse import urlparse
235    parsed = urlparse(url)
236    if not parsed.path:
237        raise ValueError(e + url)
238
239    unc_path = parsed.path.replace("/", "\\")
240    server_path = "\\\\{}{}".format(parsed.hostname, unc_path)
241
242    if not parsed.username:
243        domain = None
244        username = None
245    elif ";" in parsed.username:
246        domain, username = parsed.username.split(";")
247    else:
248        domain, username = None, parsed.username
249    port = 445 if not parsed.port else int(parsed.port)
250    return domain, parsed.hostname, port, username, parsed.password, server_path
251
252
253register_reader("smb", SMBSource)
254register_writer("smb", SMBSource)
255
256# endregion
257