1import sys
2from textwrap import dedent
3
4import numpy as np
5import pandas as pd
6import pytest
7from numpy.core import defchararray
8
9import xarray as xr
10from xarray.core import formatting
11
12from . import requires_netCDF4
13
14
15class TestFormatting:
16    def test_get_indexer_at_least_n_items(self) -> None:
17        cases = [
18            ((20,), (slice(10),), (slice(-10, None),)),
19            ((3, 20), (0, slice(10)), (-1, slice(-10, None))),
20            ((2, 10), (0, slice(10)), (-1, slice(-10, None))),
21            ((2, 5), (slice(2), slice(None)), (slice(-2, None), slice(None))),
22            ((1, 2, 5), (0, slice(2), slice(None)), (-1, slice(-2, None), slice(None))),
23            ((2, 3, 5), (0, slice(2), slice(None)), (-1, slice(-2, None), slice(None))),
24            (
25                (1, 10, 1),
26                (0, slice(10), slice(None)),
27                (-1, slice(-10, None), slice(None)),
28            ),
29            (
30                (2, 5, 1),
31                (slice(2), slice(None), slice(None)),
32                (slice(-2, None), slice(None), slice(None)),
33            ),
34            ((2, 5, 3), (0, slice(4), slice(None)), (-1, slice(-4, None), slice(None))),
35            (
36                (2, 3, 3),
37                (slice(2), slice(None), slice(None)),
38                (slice(-2, None), slice(None), slice(None)),
39            ),
40        ]
41        for shape, start_expected, end_expected in cases:
42            actual = formatting._get_indexer_at_least_n_items(shape, 10, from_end=False)
43            assert start_expected == actual
44            actual = formatting._get_indexer_at_least_n_items(shape, 10, from_end=True)
45            assert end_expected == actual
46
47    def test_first_n_items(self) -> None:
48        array = np.arange(100).reshape(10, 5, 2)
49        for n in [3, 10, 13, 100, 200]:
50            actual = formatting.first_n_items(array, n)
51            expected = array.flat[:n]
52            assert (expected == actual).all()
53
54        with pytest.raises(ValueError, match=r"at least one item"):
55            formatting.first_n_items(array, 0)
56
57    def test_last_n_items(self) -> None:
58        array = np.arange(100).reshape(10, 5, 2)
59        for n in [3, 10, 13, 100, 200]:
60            actual = formatting.last_n_items(array, n)
61            expected = array.flat[-n:]
62            assert (expected == actual).all()
63
64        with pytest.raises(ValueError, match=r"at least one item"):
65            formatting.first_n_items(array, 0)
66
67    def test_last_item(self) -> None:
68        array = np.arange(100)
69
70        reshape = ((10, 10), (1, 100), (2, 2, 5, 5))
71        expected = np.array([99])
72
73        for r in reshape:
74            result = formatting.last_item(array.reshape(r))
75            assert result == expected
76
77    def test_format_item(self) -> None:
78        cases = [
79            (pd.Timestamp("2000-01-01T12"), "2000-01-01T12:00:00"),
80            (pd.Timestamp("2000-01-01"), "2000-01-01"),
81            (pd.Timestamp("NaT"), "NaT"),
82            (pd.Timedelta("10 days 1 hour"), "10 days 01:00:00"),
83            (pd.Timedelta("-3 days"), "-3 days +00:00:00"),
84            (pd.Timedelta("3 hours"), "0 days 03:00:00"),
85            (pd.Timedelta("NaT"), "NaT"),
86            ("foo", "'foo'"),
87            (b"foo", "b'foo'"),
88            (1, "1"),
89            (1.0, "1.0"),
90            (np.float16(1.1234), "1.123"),
91            (np.float32(1.0111111), "1.011"),
92            (np.float64(22.222222), "22.22"),
93        ]
94        for item, expected in cases:
95            actual = formatting.format_item(item)
96            assert expected == actual
97
98    def test_format_items(self) -> None:
99        cases = [
100            (np.arange(4) * np.timedelta64(1, "D"), "0 days 1 days 2 days 3 days"),
101            (
102                np.arange(4) * np.timedelta64(3, "h"),
103                "00:00:00 03:00:00 06:00:00 09:00:00",
104            ),
105            (
106                np.arange(4) * np.timedelta64(500, "ms"),
107                "00:00:00 00:00:00.500000 00:00:01 00:00:01.500000",
108            ),
109            (pd.to_timedelta(["NaT", "0s", "1s", "NaT"]), "NaT 00:00:00 00:00:01 NaT"),
110            (
111                pd.to_timedelta(["1 day 1 hour", "1 day", "0 hours"]),
112                "1 days 01:00:00 1 days 00:00:00 0 days 00:00:00",
113            ),
114            ([1, 2, 3], "1 2 3"),
115        ]
116        for item, expected in cases:
117            actual = " ".join(formatting.format_items(item))
118            assert expected == actual
119
120    def test_format_array_flat(self) -> None:
121        actual = formatting.format_array_flat(np.arange(100), 2)
122        expected = "..."
123        assert expected == actual
124
125        actual = formatting.format_array_flat(np.arange(100), 9)
126        expected = "0 ... 99"
127        assert expected == actual
128
129        actual = formatting.format_array_flat(np.arange(100), 10)
130        expected = "0 1 ... 99"
131        assert expected == actual
132
133        actual = formatting.format_array_flat(np.arange(100), 13)
134        expected = "0 1 ... 98 99"
135        assert expected == actual
136
137        actual = formatting.format_array_flat(np.arange(100), 15)
138        expected = "0 1 2 ... 98 99"
139        assert expected == actual
140
141        # NB: Probably not ideal; an alternative would be cutting after the
142        # first ellipsis
143        actual = formatting.format_array_flat(np.arange(100.0), 11)
144        expected = "0.0 ... ..."
145        assert expected == actual
146
147        actual = formatting.format_array_flat(np.arange(100.0), 12)
148        expected = "0.0 ... 99.0"
149        assert expected == actual
150
151        actual = formatting.format_array_flat(np.arange(3), 5)
152        expected = "0 1 2"
153        assert expected == actual
154
155        actual = formatting.format_array_flat(np.arange(4.0), 11)
156        expected = "0.0 ... 3.0"
157        assert expected == actual
158
159        actual = formatting.format_array_flat(np.arange(0), 0)
160        expected = ""
161        assert expected == actual
162
163        actual = formatting.format_array_flat(np.arange(1), 1)
164        expected = "0"
165        assert expected == actual
166
167        actual = formatting.format_array_flat(np.arange(2), 3)
168        expected = "0 1"
169        assert expected == actual
170
171        actual = formatting.format_array_flat(np.arange(4), 7)
172        expected = "0 1 2 3"
173        assert expected == actual
174
175        actual = formatting.format_array_flat(np.arange(5), 7)
176        expected = "0 ... 4"
177        assert expected == actual
178
179        long_str = [" ".join(["hello world" for _ in range(100)])]
180        actual = formatting.format_array_flat(np.asarray([long_str]), 21)
181        expected = "'hello world hello..."
182        assert expected == actual
183
184    def test_pretty_print(self) -> None:
185        assert formatting.pretty_print("abcdefghij", 8) == "abcde..."
186        assert formatting.pretty_print("ß", 1) == "ß"
187
188    def test_maybe_truncate(self) -> None:
189        assert formatting.maybe_truncate("ß", 10) == "ß"
190
191    def test_format_timestamp_out_of_bounds(self) -> None:
192        from datetime import datetime
193
194        date = datetime(1300, 12, 1)
195        expected = "1300-12-01"
196        result = formatting.format_timestamp(date)
197        assert result == expected
198
199        date = datetime(2300, 12, 1)
200        expected = "2300-12-01"
201        result = formatting.format_timestamp(date)
202        assert result == expected
203
204    def test_attribute_repr(self) -> None:
205        short = formatting.summarize_attr("key", "Short string")
206        long = formatting.summarize_attr("key", 100 * "Very long string ")
207        newlines = formatting.summarize_attr("key", "\n\n\n")
208        tabs = formatting.summarize_attr("key", "\t\t\t")
209        assert short == "    key: Short string"
210        assert len(long) <= 80
211        assert long.endswith("...")
212        assert "\n" not in newlines
213        assert "\t" not in tabs
214
215    def test_diff_array_repr(self) -> None:
216        da_a = xr.DataArray(
217            np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"),
218            dims=("x", "y"),
219            coords={
220                "x": np.array(["a", "b"], dtype="U1"),
221                "y": np.array([1, 2, 3], dtype="int64"),
222            },
223            attrs={"units": "m", "description": "desc"},
224        )
225
226        da_b = xr.DataArray(
227            np.array([1, 2], dtype="int64"),
228            dims="x",
229            coords={
230                "x": np.array(["a", "c"], dtype="U1"),
231                "label": ("x", np.array([1, 2], dtype="int64")),
232            },
233            attrs={"units": "kg"},
234        )
235
236        byteorder = "<" if sys.byteorder == "little" else ">"
237        expected = dedent(
238            """\
239        Left and right DataArray objects are not identical
240        Differing dimensions:
241            (x: 2, y: 3) != (x: 2)
242        Differing values:
243        L
244            array([[1, 2, 3],
245                   [4, 5, 6]], dtype=int64)
246        R
247            array([1, 2], dtype=int64)
248        Differing coordinates:
249        L * x        (x) %cU1 'a' 'b'
250        R * x        (x) %cU1 'a' 'c'
251        Coordinates only on the left object:
252          * y        (y) int64 1 2 3
253        Coordinates only on the right object:
254            label    (x) int64 1 2
255        Differing attributes:
256        L   units: m
257        R   units: kg
258        Attributes only on the left object:
259            description: desc"""
260            % (byteorder, byteorder)
261        )
262
263        actual = formatting.diff_array_repr(da_a, da_b, "identical")
264        try:
265            assert actual == expected
266        except AssertionError:
267            # depending on platform, dtype may not be shown in numpy array repr
268            assert actual == expected.replace(", dtype=int64", "")
269
270        va = xr.Variable(
271            "x", np.array([1, 2, 3], dtype="int64"), {"title": "test Variable"}
272        )
273        vb = xr.Variable(("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"))
274
275        expected = dedent(
276            """\
277        Left and right Variable objects are not equal
278        Differing dimensions:
279            (x: 3) != (x: 2, y: 3)
280        Differing values:
281        L
282            array([1, 2, 3], dtype=int64)
283        R
284            array([[1, 2, 3],
285                   [4, 5, 6]], dtype=int64)"""
286        )
287
288        actual = formatting.diff_array_repr(va, vb, "equals")
289        try:
290            assert actual == expected
291        except AssertionError:
292            assert actual == expected.replace(", dtype=int64", "")
293
294    @pytest.mark.filterwarnings("error")
295    def test_diff_attrs_repr_with_array(self) -> None:
296        attrs_a = {"attr": np.array([0, 1])}
297
298        attrs_b = {"attr": 1}
299        expected = dedent(
300            """\
301            Differing attributes:
302            L   attr: [0 1]
303            R   attr: 1
304            """
305        ).strip()
306        actual = formatting.diff_attrs_repr(attrs_a, attrs_b, "equals")
307        assert expected == actual
308
309        attrs_c = {"attr": np.array([-3, 5])}
310        expected = dedent(
311            """\
312            Differing attributes:
313            L   attr: [0 1]
314            R   attr: [-3  5]
315            """
316        ).strip()
317        actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals")
318        assert expected == actual
319
320        # should not raise a warning
321        attrs_c = {"attr": np.array([0, 1, 2])}
322        expected = dedent(
323            """\
324            Differing attributes:
325            L   attr: [0 1]
326            R   attr: [0 1 2]
327            """
328        ).strip()
329        actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals")
330        assert expected == actual
331
332    def test_diff_dataset_repr(self) -> None:
333        ds_a = xr.Dataset(
334            data_vars={
335                "var1": (("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64")),
336                "var2": ("x", np.array([3, 4], dtype="int64")),
337            },
338            coords={
339                "x": np.array(["a", "b"], dtype="U1"),
340                "y": np.array([1, 2, 3], dtype="int64"),
341            },
342            attrs={"units": "m", "description": "desc"},
343        )
344
345        ds_b = xr.Dataset(
346            data_vars={"var1": ("x", np.array([1, 2], dtype="int64"))},
347            coords={
348                "x": ("x", np.array(["a", "c"], dtype="U1"), {"source": 0}),
349                "label": ("x", np.array([1, 2], dtype="int64")),
350            },
351            attrs={"units": "kg"},
352        )
353
354        byteorder = "<" if sys.byteorder == "little" else ">"
355        expected = dedent(
356            """\
357        Left and right Dataset objects are not identical
358        Differing dimensions:
359            (x: 2, y: 3) != (x: 2)
360        Differing coordinates:
361        L * x        (x) %cU1 'a' 'b'
362        R * x        (x) %cU1 'a' 'c'
363            source: 0
364        Coordinates only on the left object:
365          * y        (y) int64 1 2 3
366        Coordinates only on the right object:
367            label    (x) int64 1 2
368        Differing data variables:
369        L   var1     (x, y) int64 1 2 3 4 5 6
370        R   var1     (x) int64 1 2
371        Data variables only on the left object:
372            var2     (x) int64 3 4
373        Differing attributes:
374        L   units: m
375        R   units: kg
376        Attributes only on the left object:
377            description: desc"""
378            % (byteorder, byteorder)
379        )
380
381        actual = formatting.diff_dataset_repr(ds_a, ds_b, "identical")
382        assert actual == expected
383
384    def test_array_repr(self) -> None:
385        ds = xr.Dataset(coords={"foo": [1, 2, 3], "bar": [1, 2, 3]})
386        ds[(1, 2)] = xr.DataArray([0], dims="test")
387        actual = formatting.array_repr(ds[(1, 2)])
388        expected = dedent(
389            """\
390        <xarray.DataArray (1, 2) (test: 1)>
391        array([0])
392        Dimensions without coordinates: test"""
393        )
394
395        assert actual == expected
396
397        with xr.set_options(display_expand_data=False):
398            actual = formatting.array_repr(ds[(1, 2)])
399            expected = dedent(
400                """\
401            <xarray.DataArray (1, 2) (test: 1)>
402            0
403            Dimensions without coordinates: test"""
404            )
405
406            assert actual == expected
407
408    def test_array_repr_variable(self) -> None:
409        var = xr.Variable("x", [0, 1])
410
411        formatting.array_repr(var)
412
413        with xr.set_options(display_expand_data=False):
414            formatting.array_repr(var)
415
416
417def test_inline_variable_array_repr_custom_repr() -> None:
418    class CustomArray:
419        def __init__(self, value, attr):
420            self.value = value
421            self.attr = attr
422
423        def _repr_inline_(self, width):
424            formatted = f"({self.attr}) {self.value}"
425            if len(formatted) > width:
426                formatted = f"({self.attr}) ..."
427
428            return formatted
429
430        def __array_function__(self, *args, **kwargs):
431            return NotImplemented
432
433        @property
434        def shape(self):
435            return self.value.shape
436
437        @property
438        def dtype(self):
439            return self.value.dtype
440
441        @property
442        def ndim(self):
443            return self.value.ndim
444
445    value = CustomArray(np.array([20, 40]), "m")
446    variable = xr.Variable("x", value)
447
448    max_width = 10
449    actual = formatting.inline_variable_array_repr(variable, max_width=10)
450
451    assert actual == value._repr_inline_(max_width)
452
453
454def test_set_numpy_options() -> None:
455    original_options = np.get_printoptions()
456    with formatting.set_numpy_options(threshold=10):
457        assert len(repr(np.arange(500))) < 200
458    # original options are restored
459    assert np.get_printoptions() == original_options
460
461
462def test_short_numpy_repr() -> None:
463    cases = [
464        np.random.randn(500),
465        np.random.randn(20, 20),
466        np.random.randn(5, 10, 15),
467        np.random.randn(5, 10, 15, 3),
468        np.random.randn(100, 5, 1),
469    ]
470    # number of lines:
471    # for default numpy repr: 167, 140, 254, 248, 599
472    # for short_numpy_repr: 1, 7, 24, 19, 25
473    for array in cases:
474        num_lines = formatting.short_numpy_repr(array).count("\n") + 1
475        assert num_lines < 30
476
477
478def test_large_array_repr_length() -> None:
479
480    da = xr.DataArray(np.random.randn(100, 5, 1))
481
482    result = repr(da).splitlines()
483    assert len(result) < 50
484
485
486@requires_netCDF4
487def test_repr_file_collapsed(tmp_path) -> None:
488    arr = xr.DataArray(np.arange(300), dims="test")
489    arr.to_netcdf(tmp_path / "test.nc", engine="netcdf4")
490
491    with xr.open_dataarray(tmp_path / "test.nc") as arr, xr.set_options(
492        display_expand_data=False
493    ):
494        actual = formatting.array_repr(arr)
495        expected = dedent(
496            """\
497        <xarray.DataArray (test: 300)>
498        array([  0,   1,   2, ..., 297, 298, 299])
499        Dimensions without coordinates: test"""
500        )
501
502        assert actual == expected
503
504
505@pytest.mark.parametrize(
506    "display_max_rows, n_vars, n_attr",
507    [(50, 40, 30), (35, 40, 30), (11, 40, 30), (1, 40, 30)],
508)
509def test__mapping_repr(display_max_rows, n_vars, n_attr) -> None:
510    long_name = "long_name"
511    a = defchararray.add(long_name, np.arange(0, n_vars).astype(str))
512    b = defchararray.add("attr_", np.arange(0, n_attr).astype(str))
513    c = defchararray.add("coord", np.arange(0, n_vars).astype(str))
514    attrs = {k: 2 for k in b}
515    coords = {_c: np.array([0, 1]) for _c in c}
516    data_vars = dict()
517    for (v, _c) in zip(a, coords.items()):
518        data_vars[v] = xr.DataArray(
519            name=v,
520            data=np.array([3, 4]),
521            dims=[_c[0]],
522            coords=dict([_c]),
523        )
524    ds = xr.Dataset(data_vars)
525    ds.attrs = attrs
526
527    with xr.set_options(display_max_rows=display_max_rows):
528
529        # Parse the data_vars print and show only data_vars rows:
530        summary = formatting.dataset_repr(ds).split("\n")
531        summary = [v for v in summary if long_name in v]
532        # The length should be less than or equal to display_max_rows:
533        len_summary = len(summary)
534        data_vars_print_size = min(display_max_rows, len_summary)
535        assert len_summary == data_vars_print_size
536
537        summary = formatting.data_vars_repr(ds.data_vars).split("\n")
538        summary = [v for v in summary if long_name in v]
539        # The length should be equal to the number of data variables
540        len_summary = len(summary)
541        assert len_summary == n_vars
542
543        summary = formatting.coords_repr(ds.coords).split("\n")
544        summary = [v for v in summary if "coord" in v]
545        # The length should be equal to the number of data variables
546        len_summary = len(summary)
547        assert len_summary == n_vars
548
549    with xr.set_options(
550        display_expand_coords=False,
551        display_expand_data_vars=False,
552        display_expand_attrs=False,
553    ):
554        actual = formatting.dataset_repr(ds)
555        coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()])
556        expected = dedent(
557            f"""\
558            <xarray.Dataset>
559            Dimensions:      ({coord_s})
560            Coordinates: ({n_vars})
561            Data variables: ({n_vars})
562            Attributes: ({n_attr})"""
563        )
564        assert actual == expected
565