1# -*- coding: utf-8 -*- 2from __future__ import absolute_import, print_function, division 3 4import sys 5import os 6from importlib import import_module 7 8from petl.compat import PY3 9from petl.test.helpers import ieq, eq_ 10from petl.io.avro import fromavro, toavro 11from petl.io.csv import fromcsv, tocsv 12from petl.io.json import fromjson, tojson 13from petl.io.xlsx import fromxlsx, toxlsx 14from petl.io.xls import fromxls, toxls 15from petl.util.vis import look 16 17# region Codec test cases 18 19 20def test_helper_local(): 21 if PY3: 22 _ensure_dir("./tmp") 23 _write_read_into_url("./tmp/example.") 24 25 26def test_helper_fsspec(): 27 try: 28 # pylint: disable=unused-import 29 import fsspec # noqa: F401 30 except ImportError as e: 31 print("SKIP FSSPEC helper tests: %s" % e, file=sys.stderr) 32 else: 33 _write_read_from_env_matching("PETL_TEST_") 34 35 36def test_helper_smb(): 37 try: 38 # pylint: disable=unused-import 39 import smbclient # noqa: F401 40 except ImportError as e: 41 print("SKIP SMB helper tests: %s" % e, file=sys.stderr) 42 else: 43 _write_read_from_env_url("PETL_SMB_URL") 44 45 46def test_helper_smb_url_parse(): 47 from petl.io.remotes import _parse_smb_url 48 49 url = r"smb://workgroup;user:password@server:444/share/folder/file.csv" 50 domain, host, port, user, passwd, server_path = _parse_smb_url(url) 51 # print("Parsed:", domain, host, port, user, passwd, server_path) 52 eq_(domain, r"workgroup") 53 eq_(host, r"server") 54 eq_(port, 444) 55 eq_(user, r"user") 56 eq_(passwd, r"password") 57 eq_(server_path, "\\\\server\\share\\folder\\file.csv") 58 59 60# endregion 61 62# region Execution 63 64 65def _ensure_dir(directory): 66 if not os.path.exists(directory): 67 os.makedirs(directory) 68 69 70def _write_read_from_env_matching(prefix): 71 q = 0 72 for variable, base_url in os.environ.items(): 73 if variable.upper().startswith(prefix.upper()): 74 fmsg = "\n {}: {} -> ".format(variable, base_url) 75 print(fmsg, file=sys.stderr, end="") 76 _write_read_into_url(base_url) 77 print("DONE ", file=sys.stderr, end="") 78 q += 1 79 if q < 1: 80 msg = """SKIPPED 81 For testing remote source define a environment variable: 82 $ export PETL_TEST_<protocol>='<protocol>://myuser:mypassword@host:port/path/to/folder'""" 83 print(msg, file=sys.stderr) 84 85 86def _write_read_from_env_url(env_var_name): 87 base_url = os.getenv(env_var_name, "skip") 88 if base_url == "skip": 89 print("SKIPPED ", file=sys.stderr, end="") 90 else: 91 _write_read_into_url(base_url) 92 print("DONE ", file=sys.stderr, end="") 93 94 95def _write_read_into_url(base_url): 96 _write_read_file_into_url(base_url, "filename10.csv") 97 _write_read_file_into_url(base_url, "filename11.csv", "gz") 98 _write_read_file_into_url(base_url, "filename12.csv", "xz") 99 _write_read_file_into_url(base_url, "filename13.csv", "zst") 100 _write_read_file_into_url(base_url, "filename14.csv", "lz4") 101 _write_read_file_into_url(base_url, "filename15.csv", "snappy") 102 _write_read_file_into_url(base_url, "filename20.json") 103 _write_read_file_into_url(base_url, "filename21.json", "gz") 104 _write_read_file_into_url(base_url, "filename30.avro", pkg='fastavro') 105 _write_read_file_into_url(base_url, "filename40.xlsx", pkg='openpyxl') 106 _write_read_file_into_url(base_url, "filename50.xls", pkg='xlwt') 107 108 109def _build_source_url_from(base_url, filename, compression=None): 110 is_local = base_url.startswith("./") 111 if compression is not None: 112 if is_local: 113 return None 114 filename = filename + "." + compression 115 import fsspec 116 codec = fsspec.utils.infer_compression(filename) 117 if codec is None: 118 print("\n - %s SKIPPED " % filename, file=sys.stderr, end="") 119 return None 120 print("\n - %s " % filename, file=sys.stderr, end="") 121 if is_local: 122 source_url = base_url + filename 123 else: 124 source_url = os.path.join(base_url, filename) 125 return source_url 126 127 128def _write_read_file_into_url(base_url, filename, compression=None, pkg=None): 129 if not _is_installed(pkg, filename): 130 return 131 source_url = _build_source_url_from(base_url, filename, compression) 132 if source_url is None: 133 return 134 actual = None 135 if ".avro" in filename: 136 toavro(_table, source_url) 137 actual = fromavro(source_url) 138 elif ".xlsx" in filename: 139 toxlsx(_table, source_url, 'test1', mode='overwrite') 140 toxlsx(_table2, source_url, 'test2', mode='add') 141 actual = fromxlsx(source_url, 'test1') 142 elif ".xls" in filename: 143 toxls(_table, source_url, 'test') 144 actual = fromxls(source_url, 'test') 145 elif ".json" in filename: 146 tojson(_table, source_url) 147 actual = fromjson(source_url) 148 elif ".csv" in filename: 149 tocsv(_table, source_url, encoding="ascii", lineterminator="\n") 150 actual = fromcsv(source_url, encoding="ascii") 151 152 if actual is not None: 153 _show__rows_from("Expected:", _table) 154 _show__rows_from("Actual:", actual) 155 ieq(_table, actual) 156 ieq(_table, actual) # verify can iterate twice 157 else: 158 print("\n - %s SKIPPED " % filename, file=sys.stderr, end="") 159 160 161def _show__rows_from(label, test_rows, limit=0): 162 print(label) 163 print(look(test_rows, limit=limit)) 164 165 166def _is_installed(package_name, message=None): 167 if package_name is None: 168 return True # Not required 169 try: 170 mod = import_module(package_name) 171 found = mod is not None 172 if not found: 173 msg = message or package_name 174 print("\n - %s SKIPPED " % msg, file=sys.stderr, end="") 175 return found 176 except Exception as exm: 177 print(exm, file=sys.stderr) 178 return False 179 180 181# endregion 182 183# region Mockup data 184 185_table = ( 186 (u"name", u"friends", u"age"), 187 (u"Bob", "42", "33"), 188 (u"Jim", "13", "69"), 189 (u"Joe", "86", "17"), 190 (u"Ted", "23", "51"), 191) 192 193_table2 = ( 194 (u"name", u"friends", u"age"), 195 (u"Giannis", "31", "12"), 196 (u"James", "38", "8"), 197 (u"Stephen", "28", "4"), 198 (u"Jason", "23", "12"), 199) 200 201# endregion 202