1from io import BytesIO
2
3import pytest
4
5import pandas.util._test_decorators as td
6
7import pandas as pd
8import pandas._testing as tm
9
10
11def test_compression_roundtrip(compression):
12    df = pd.DataFrame(
13        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
14        index=["A", "B"],
15        columns=["X", "Y", "Z"],
16    )
17
18    with tm.ensure_clean() as path:
19        df.to_json(path, compression=compression)
20        tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
21
22        # explicitly ensure file was compressed.
23        with tm.decompress_file(path, compression) as fh:
24            result = fh.read().decode("utf8")
25        tm.assert_frame_equal(df, pd.read_json(result))
26
27
28def test_read_zipped_json(datapath):
29    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
30    uncompressed_df = pd.read_json(uncompressed_path)
31
32    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
33    compressed_df = pd.read_json(compressed_path, compression="zip")
34
35    tm.assert_frame_equal(uncompressed_df, compressed_df)
36
37
38@td.skip_if_not_us_locale
39def test_with_s3_url(compression, s3_resource, s3so):
40    # Bucket "pandas-test" created in tests/io/conftest.py
41
42    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
43
44    with tm.ensure_clean() as path:
45        df.to_json(path, compression=compression)
46        with open(path, "rb") as f:
47            s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f)
48
49    roundtripped_df = pd.read_json(
50        "s3://pandas-test/test-1", compression=compression, storage_options=s3so
51    )
52    tm.assert_frame_equal(df, roundtripped_df)
53
54
55def test_lines_with_compression(compression):
56
57    with tm.ensure_clean() as path:
58        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
59        df.to_json(path, orient="records", lines=True, compression=compression)
60        roundtripped_df = pd.read_json(path, lines=True, compression=compression)
61        tm.assert_frame_equal(df, roundtripped_df)
62
63
64def test_chunksize_with_compression(compression):
65
66    with tm.ensure_clean() as path:
67        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
68        df.to_json(path, orient="records", lines=True, compression=compression)
69
70        with pd.read_json(
71            path, lines=True, chunksize=1, compression=compression
72        ) as res:
73            roundtripped_df = pd.concat(res)
74        tm.assert_frame_equal(df, roundtripped_df)
75
76
77def test_write_unsupported_compression_type():
78    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
79    with tm.ensure_clean() as path:
80        msg = "Unrecognized compression type: unsupported"
81        with pytest.raises(ValueError, match=msg):
82            df.to_json(path, compression="unsupported")
83
84
85def test_read_unsupported_compression_type():
86    with tm.ensure_clean() as path:
87        msg = "Unrecognized compression type: unsupported"
88        with pytest.raises(ValueError, match=msg):
89            pd.read_json(path, compression="unsupported")
90
91
92@pytest.mark.parametrize("to_infer", [True, False])
93@pytest.mark.parametrize("read_infer", [True, False])
94def test_to_json_compression(compression_only, read_infer, to_infer):
95    # see gh-15008
96    compression = compression_only
97
98    if compression == "zip":
99        pytest.skip(f"{compression} is not supported for to_csv")
100
101    # We'll complete file extension subsequently.
102    filename = "test."
103
104    if compression == "gzip":
105        filename += "gz"
106    else:
107        # xz --> .xz
108        # bz2 --> .bz2
109        filename += compression
110
111    df = pd.DataFrame({"A": [1]})
112
113    to_compression = "infer" if to_infer else compression
114    read_compression = "infer" if read_infer else compression
115
116    with tm.ensure_clean(filename) as path:
117        df.to_json(path, compression=to_compression)
118        result = pd.read_json(path, compression=read_compression)
119        tm.assert_frame_equal(result, df)
120
121
122def test_to_json_compression_mode(compression):
123    # GH 39985 (read_json does not support user-provided binary files)
124    expected = pd.DataFrame({"A": [1]})
125
126    with BytesIO() as buffer:
127        expected.to_json(buffer, compression=compression)
128        # df = pd.read_json(buffer, compression=compression)
129        # tm.assert_frame_equal(expected, df)
130