1from io import BytesIO 2 3import pytest 4 5import pandas.util._test_decorators as td 6 7import pandas as pd 8import pandas._testing as tm 9 10 11def test_compression_roundtrip(compression): 12 df = pd.DataFrame( 13 [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], 14 index=["A", "B"], 15 columns=["X", "Y", "Z"], 16 ) 17 18 with tm.ensure_clean() as path: 19 df.to_json(path, compression=compression) 20 tm.assert_frame_equal(df, pd.read_json(path, compression=compression)) 21 22 # explicitly ensure file was compressed. 23 with tm.decompress_file(path, compression) as fh: 24 result = fh.read().decode("utf8") 25 tm.assert_frame_equal(df, pd.read_json(result)) 26 27 28def test_read_zipped_json(datapath): 29 uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json") 30 uncompressed_df = pd.read_json(uncompressed_path) 31 32 compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip") 33 compressed_df = pd.read_json(compressed_path, compression="zip") 34 35 tm.assert_frame_equal(uncompressed_df, compressed_df) 36 37 38@td.skip_if_not_us_locale 39def test_with_s3_url(compression, s3_resource, s3so): 40 # Bucket "pandas-test" created in tests/io/conftest.py 41 42 df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') 43 44 with tm.ensure_clean() as path: 45 df.to_json(path, compression=compression) 46 with open(path, "rb") as f: 47 s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f) 48 49 roundtripped_df = pd.read_json( 50 "s3://pandas-test/test-1", compression=compression, storage_options=s3so 51 ) 52 tm.assert_frame_equal(df, roundtripped_df) 53 54 55def test_lines_with_compression(compression): 56 57 with tm.ensure_clean() as path: 58 df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') 59 df.to_json(path, orient="records", lines=True, compression=compression) 60 roundtripped_df = pd.read_json(path, lines=True, compression=compression) 61 tm.assert_frame_equal(df, roundtripped_df) 62 63 64def test_chunksize_with_compression(compression): 65 66 with tm.ensure_clean() as path: 67 df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') 68 df.to_json(path, orient="records", lines=True, compression=compression) 69 70 with pd.read_json( 71 path, lines=True, chunksize=1, compression=compression 72 ) as res: 73 roundtripped_df = pd.concat(res) 74 tm.assert_frame_equal(df, roundtripped_df) 75 76 77def test_write_unsupported_compression_type(): 78 df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') 79 with tm.ensure_clean() as path: 80 msg = "Unrecognized compression type: unsupported" 81 with pytest.raises(ValueError, match=msg): 82 df.to_json(path, compression="unsupported") 83 84 85def test_read_unsupported_compression_type(): 86 with tm.ensure_clean() as path: 87 msg = "Unrecognized compression type: unsupported" 88 with pytest.raises(ValueError, match=msg): 89 pd.read_json(path, compression="unsupported") 90 91 92@pytest.mark.parametrize("to_infer", [True, False]) 93@pytest.mark.parametrize("read_infer", [True, False]) 94def test_to_json_compression(compression_only, read_infer, to_infer): 95 # see gh-15008 96 compression = compression_only 97 98 if compression == "zip": 99 pytest.skip(f"{compression} is not supported for to_csv") 100 101 # We'll complete file extension subsequently. 102 filename = "test." 103 104 if compression == "gzip": 105 filename += "gz" 106 else: 107 # xz --> .xz 108 # bz2 --> .bz2 109 filename += compression 110 111 df = pd.DataFrame({"A": [1]}) 112 113 to_compression = "infer" if to_infer else compression 114 read_compression = "infer" if read_infer else compression 115 116 with tm.ensure_clean(filename) as path: 117 df.to_json(path, compression=to_compression) 118 result = pd.read_json(path, compression=read_compression) 119 tm.assert_frame_equal(result, df) 120 121 122def test_to_json_compression_mode(compression): 123 # GH 39985 (read_json does not support user-provided binary files) 124 expected = pd.DataFrame({"A": [1]}) 125 126 with BytesIO() as buffer: 127 expected.to_json(buffer, compression=compression) 128 # df = pd.read_json(buffer, compression=compression) 129 # tm.assert_frame_equal(expected, df) 130