1import sys 2from textwrap import dedent 3 4import numpy as np 5import pandas as pd 6import pytest 7from numpy.core import defchararray 8 9import xarray as xr 10from xarray.core import formatting 11 12from . import requires_netCDF4 13 14 15class TestFormatting: 16 def test_get_indexer_at_least_n_items(self) -> None: 17 cases = [ 18 ((20,), (slice(10),), (slice(-10, None),)), 19 ((3, 20), (0, slice(10)), (-1, slice(-10, None))), 20 ((2, 10), (0, slice(10)), (-1, slice(-10, None))), 21 ((2, 5), (slice(2), slice(None)), (slice(-2, None), slice(None))), 22 ((1, 2, 5), (0, slice(2), slice(None)), (-1, slice(-2, None), slice(None))), 23 ((2, 3, 5), (0, slice(2), slice(None)), (-1, slice(-2, None), slice(None))), 24 ( 25 (1, 10, 1), 26 (0, slice(10), slice(None)), 27 (-1, slice(-10, None), slice(None)), 28 ), 29 ( 30 (2, 5, 1), 31 (slice(2), slice(None), slice(None)), 32 (slice(-2, None), slice(None), slice(None)), 33 ), 34 ((2, 5, 3), (0, slice(4), slice(None)), (-1, slice(-4, None), slice(None))), 35 ( 36 (2, 3, 3), 37 (slice(2), slice(None), slice(None)), 38 (slice(-2, None), slice(None), slice(None)), 39 ), 40 ] 41 for shape, start_expected, end_expected in cases: 42 actual = formatting._get_indexer_at_least_n_items(shape, 10, from_end=False) 43 assert start_expected == actual 44 actual = formatting._get_indexer_at_least_n_items(shape, 10, from_end=True) 45 assert end_expected == actual 46 47 def test_first_n_items(self) -> None: 48 array = np.arange(100).reshape(10, 5, 2) 49 for n in [3, 10, 13, 100, 200]: 50 actual = formatting.first_n_items(array, n) 51 expected = array.flat[:n] 52 assert (expected == actual).all() 53 54 with pytest.raises(ValueError, match=r"at least one item"): 55 formatting.first_n_items(array, 0) 56 57 def test_last_n_items(self) -> None: 58 array = np.arange(100).reshape(10, 5, 2) 59 for n in [3, 10, 13, 100, 200]: 60 actual = formatting.last_n_items(array, n) 61 expected = array.flat[-n:] 62 assert (expected == actual).all() 63 64 with pytest.raises(ValueError, match=r"at least one item"): 65 formatting.first_n_items(array, 0) 66 67 def test_last_item(self) -> None: 68 array = np.arange(100) 69 70 reshape = ((10, 10), (1, 100), (2, 2, 5, 5)) 71 expected = np.array([99]) 72 73 for r in reshape: 74 result = formatting.last_item(array.reshape(r)) 75 assert result == expected 76 77 def test_format_item(self) -> None: 78 cases = [ 79 (pd.Timestamp("2000-01-01T12"), "2000-01-01T12:00:00"), 80 (pd.Timestamp("2000-01-01"), "2000-01-01"), 81 (pd.Timestamp("NaT"), "NaT"), 82 (pd.Timedelta("10 days 1 hour"), "10 days 01:00:00"), 83 (pd.Timedelta("-3 days"), "-3 days +00:00:00"), 84 (pd.Timedelta("3 hours"), "0 days 03:00:00"), 85 (pd.Timedelta("NaT"), "NaT"), 86 ("foo", "'foo'"), 87 (b"foo", "b'foo'"), 88 (1, "1"), 89 (1.0, "1.0"), 90 (np.float16(1.1234), "1.123"), 91 (np.float32(1.0111111), "1.011"), 92 (np.float64(22.222222), "22.22"), 93 ] 94 for item, expected in cases: 95 actual = formatting.format_item(item) 96 assert expected == actual 97 98 def test_format_items(self) -> None: 99 cases = [ 100 (np.arange(4) * np.timedelta64(1, "D"), "0 days 1 days 2 days 3 days"), 101 ( 102 np.arange(4) * np.timedelta64(3, "h"), 103 "00:00:00 03:00:00 06:00:00 09:00:00", 104 ), 105 ( 106 np.arange(4) * np.timedelta64(500, "ms"), 107 "00:00:00 00:00:00.500000 00:00:01 00:00:01.500000", 108 ), 109 (pd.to_timedelta(["NaT", "0s", "1s", "NaT"]), "NaT 00:00:00 00:00:01 NaT"), 110 ( 111 pd.to_timedelta(["1 day 1 hour", "1 day", "0 hours"]), 112 "1 days 01:00:00 1 days 00:00:00 0 days 00:00:00", 113 ), 114 ([1, 2, 3], "1 2 3"), 115 ] 116 for item, expected in cases: 117 actual = " ".join(formatting.format_items(item)) 118 assert expected == actual 119 120 def test_format_array_flat(self) -> None: 121 actual = formatting.format_array_flat(np.arange(100), 2) 122 expected = "..." 123 assert expected == actual 124 125 actual = formatting.format_array_flat(np.arange(100), 9) 126 expected = "0 ... 99" 127 assert expected == actual 128 129 actual = formatting.format_array_flat(np.arange(100), 10) 130 expected = "0 1 ... 99" 131 assert expected == actual 132 133 actual = formatting.format_array_flat(np.arange(100), 13) 134 expected = "0 1 ... 98 99" 135 assert expected == actual 136 137 actual = formatting.format_array_flat(np.arange(100), 15) 138 expected = "0 1 2 ... 98 99" 139 assert expected == actual 140 141 # NB: Probably not ideal; an alternative would be cutting after the 142 # first ellipsis 143 actual = formatting.format_array_flat(np.arange(100.0), 11) 144 expected = "0.0 ... ..." 145 assert expected == actual 146 147 actual = formatting.format_array_flat(np.arange(100.0), 12) 148 expected = "0.0 ... 99.0" 149 assert expected == actual 150 151 actual = formatting.format_array_flat(np.arange(3), 5) 152 expected = "0 1 2" 153 assert expected == actual 154 155 actual = formatting.format_array_flat(np.arange(4.0), 11) 156 expected = "0.0 ... 3.0" 157 assert expected == actual 158 159 actual = formatting.format_array_flat(np.arange(0), 0) 160 expected = "" 161 assert expected == actual 162 163 actual = formatting.format_array_flat(np.arange(1), 1) 164 expected = "0" 165 assert expected == actual 166 167 actual = formatting.format_array_flat(np.arange(2), 3) 168 expected = "0 1" 169 assert expected == actual 170 171 actual = formatting.format_array_flat(np.arange(4), 7) 172 expected = "0 1 2 3" 173 assert expected == actual 174 175 actual = formatting.format_array_flat(np.arange(5), 7) 176 expected = "0 ... 4" 177 assert expected == actual 178 179 long_str = [" ".join(["hello world" for _ in range(100)])] 180 actual = formatting.format_array_flat(np.asarray([long_str]), 21) 181 expected = "'hello world hello..." 182 assert expected == actual 183 184 def test_pretty_print(self) -> None: 185 assert formatting.pretty_print("abcdefghij", 8) == "abcde..." 186 assert formatting.pretty_print("ß", 1) == "ß" 187 188 def test_maybe_truncate(self) -> None: 189 assert formatting.maybe_truncate("ß", 10) == "ß" 190 191 def test_format_timestamp_out_of_bounds(self) -> None: 192 from datetime import datetime 193 194 date = datetime(1300, 12, 1) 195 expected = "1300-12-01" 196 result = formatting.format_timestamp(date) 197 assert result == expected 198 199 date = datetime(2300, 12, 1) 200 expected = "2300-12-01" 201 result = formatting.format_timestamp(date) 202 assert result == expected 203 204 def test_attribute_repr(self) -> None: 205 short = formatting.summarize_attr("key", "Short string") 206 long = formatting.summarize_attr("key", 100 * "Very long string ") 207 newlines = formatting.summarize_attr("key", "\n\n\n") 208 tabs = formatting.summarize_attr("key", "\t\t\t") 209 assert short == " key: Short string" 210 assert len(long) <= 80 211 assert long.endswith("...") 212 assert "\n" not in newlines 213 assert "\t" not in tabs 214 215 def test_diff_array_repr(self) -> None: 216 da_a = xr.DataArray( 217 np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"), 218 dims=("x", "y"), 219 coords={ 220 "x": np.array(["a", "b"], dtype="U1"), 221 "y": np.array([1, 2, 3], dtype="int64"), 222 }, 223 attrs={"units": "m", "description": "desc"}, 224 ) 225 226 da_b = xr.DataArray( 227 np.array([1, 2], dtype="int64"), 228 dims="x", 229 coords={ 230 "x": np.array(["a", "c"], dtype="U1"), 231 "label": ("x", np.array([1, 2], dtype="int64")), 232 }, 233 attrs={"units": "kg"}, 234 ) 235 236 byteorder = "<" if sys.byteorder == "little" else ">" 237 expected = dedent( 238 """\ 239 Left and right DataArray objects are not identical 240 Differing dimensions: 241 (x: 2, y: 3) != (x: 2) 242 Differing values: 243 L 244 array([[1, 2, 3], 245 [4, 5, 6]], dtype=int64) 246 R 247 array([1, 2], dtype=int64) 248 Differing coordinates: 249 L * x (x) %cU1 'a' 'b' 250 R * x (x) %cU1 'a' 'c' 251 Coordinates only on the left object: 252 * y (y) int64 1 2 3 253 Coordinates only on the right object: 254 label (x) int64 1 2 255 Differing attributes: 256 L units: m 257 R units: kg 258 Attributes only on the left object: 259 description: desc""" 260 % (byteorder, byteorder) 261 ) 262 263 actual = formatting.diff_array_repr(da_a, da_b, "identical") 264 try: 265 assert actual == expected 266 except AssertionError: 267 # depending on platform, dtype may not be shown in numpy array repr 268 assert actual == expected.replace(", dtype=int64", "") 269 270 va = xr.Variable( 271 "x", np.array([1, 2, 3], dtype="int64"), {"title": "test Variable"} 272 ) 273 vb = xr.Variable(("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64")) 274 275 expected = dedent( 276 """\ 277 Left and right Variable objects are not equal 278 Differing dimensions: 279 (x: 3) != (x: 2, y: 3) 280 Differing values: 281 L 282 array([1, 2, 3], dtype=int64) 283 R 284 array([[1, 2, 3], 285 [4, 5, 6]], dtype=int64)""" 286 ) 287 288 actual = formatting.diff_array_repr(va, vb, "equals") 289 try: 290 assert actual == expected 291 except AssertionError: 292 assert actual == expected.replace(", dtype=int64", "") 293 294 @pytest.mark.filterwarnings("error") 295 def test_diff_attrs_repr_with_array(self) -> None: 296 attrs_a = {"attr": np.array([0, 1])} 297 298 attrs_b = {"attr": 1} 299 expected = dedent( 300 """\ 301 Differing attributes: 302 L attr: [0 1] 303 R attr: 1 304 """ 305 ).strip() 306 actual = formatting.diff_attrs_repr(attrs_a, attrs_b, "equals") 307 assert expected == actual 308 309 attrs_c = {"attr": np.array([-3, 5])} 310 expected = dedent( 311 """\ 312 Differing attributes: 313 L attr: [0 1] 314 R attr: [-3 5] 315 """ 316 ).strip() 317 actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals") 318 assert expected == actual 319 320 # should not raise a warning 321 attrs_c = {"attr": np.array([0, 1, 2])} 322 expected = dedent( 323 """\ 324 Differing attributes: 325 L attr: [0 1] 326 R attr: [0 1 2] 327 """ 328 ).strip() 329 actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals") 330 assert expected == actual 331 332 def test_diff_dataset_repr(self) -> None: 333 ds_a = xr.Dataset( 334 data_vars={ 335 "var1": (("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64")), 336 "var2": ("x", np.array([3, 4], dtype="int64")), 337 }, 338 coords={ 339 "x": np.array(["a", "b"], dtype="U1"), 340 "y": np.array([1, 2, 3], dtype="int64"), 341 }, 342 attrs={"units": "m", "description": "desc"}, 343 ) 344 345 ds_b = xr.Dataset( 346 data_vars={"var1": ("x", np.array([1, 2], dtype="int64"))}, 347 coords={ 348 "x": ("x", np.array(["a", "c"], dtype="U1"), {"source": 0}), 349 "label": ("x", np.array([1, 2], dtype="int64")), 350 }, 351 attrs={"units": "kg"}, 352 ) 353 354 byteorder = "<" if sys.byteorder == "little" else ">" 355 expected = dedent( 356 """\ 357 Left and right Dataset objects are not identical 358 Differing dimensions: 359 (x: 2, y: 3) != (x: 2) 360 Differing coordinates: 361 L * x (x) %cU1 'a' 'b' 362 R * x (x) %cU1 'a' 'c' 363 source: 0 364 Coordinates only on the left object: 365 * y (y) int64 1 2 3 366 Coordinates only on the right object: 367 label (x) int64 1 2 368 Differing data variables: 369 L var1 (x, y) int64 1 2 3 4 5 6 370 R var1 (x) int64 1 2 371 Data variables only on the left object: 372 var2 (x) int64 3 4 373 Differing attributes: 374 L units: m 375 R units: kg 376 Attributes only on the left object: 377 description: desc""" 378 % (byteorder, byteorder) 379 ) 380 381 actual = formatting.diff_dataset_repr(ds_a, ds_b, "identical") 382 assert actual == expected 383 384 def test_array_repr(self) -> None: 385 ds = xr.Dataset(coords={"foo": [1, 2, 3], "bar": [1, 2, 3]}) 386 ds[(1, 2)] = xr.DataArray([0], dims="test") 387 actual = formatting.array_repr(ds[(1, 2)]) 388 expected = dedent( 389 """\ 390 <xarray.DataArray (1, 2) (test: 1)> 391 array([0]) 392 Dimensions without coordinates: test""" 393 ) 394 395 assert actual == expected 396 397 with xr.set_options(display_expand_data=False): 398 actual = formatting.array_repr(ds[(1, 2)]) 399 expected = dedent( 400 """\ 401 <xarray.DataArray (1, 2) (test: 1)> 402 0 403 Dimensions without coordinates: test""" 404 ) 405 406 assert actual == expected 407 408 def test_array_repr_variable(self) -> None: 409 var = xr.Variable("x", [0, 1]) 410 411 formatting.array_repr(var) 412 413 with xr.set_options(display_expand_data=False): 414 formatting.array_repr(var) 415 416 417def test_inline_variable_array_repr_custom_repr() -> None: 418 class CustomArray: 419 def __init__(self, value, attr): 420 self.value = value 421 self.attr = attr 422 423 def _repr_inline_(self, width): 424 formatted = f"({self.attr}) {self.value}" 425 if len(formatted) > width: 426 formatted = f"({self.attr}) ..." 427 428 return formatted 429 430 def __array_function__(self, *args, **kwargs): 431 return NotImplemented 432 433 @property 434 def shape(self): 435 return self.value.shape 436 437 @property 438 def dtype(self): 439 return self.value.dtype 440 441 @property 442 def ndim(self): 443 return self.value.ndim 444 445 value = CustomArray(np.array([20, 40]), "m") 446 variable = xr.Variable("x", value) 447 448 max_width = 10 449 actual = formatting.inline_variable_array_repr(variable, max_width=10) 450 451 assert actual == value._repr_inline_(max_width) 452 453 454def test_set_numpy_options() -> None: 455 original_options = np.get_printoptions() 456 with formatting.set_numpy_options(threshold=10): 457 assert len(repr(np.arange(500))) < 200 458 # original options are restored 459 assert np.get_printoptions() == original_options 460 461 462def test_short_numpy_repr() -> None: 463 cases = [ 464 np.random.randn(500), 465 np.random.randn(20, 20), 466 np.random.randn(5, 10, 15), 467 np.random.randn(5, 10, 15, 3), 468 np.random.randn(100, 5, 1), 469 ] 470 # number of lines: 471 # for default numpy repr: 167, 140, 254, 248, 599 472 # for short_numpy_repr: 1, 7, 24, 19, 25 473 for array in cases: 474 num_lines = formatting.short_numpy_repr(array).count("\n") + 1 475 assert num_lines < 30 476 477 478def test_large_array_repr_length() -> None: 479 480 da = xr.DataArray(np.random.randn(100, 5, 1)) 481 482 result = repr(da).splitlines() 483 assert len(result) < 50 484 485 486@requires_netCDF4 487def test_repr_file_collapsed(tmp_path) -> None: 488 arr = xr.DataArray(np.arange(300), dims="test") 489 arr.to_netcdf(tmp_path / "test.nc", engine="netcdf4") 490 491 with xr.open_dataarray(tmp_path / "test.nc") as arr, xr.set_options( 492 display_expand_data=False 493 ): 494 actual = formatting.array_repr(arr) 495 expected = dedent( 496 """\ 497 <xarray.DataArray (test: 300)> 498 array([ 0, 1, 2, ..., 297, 298, 299]) 499 Dimensions without coordinates: test""" 500 ) 501 502 assert actual == expected 503 504 505@pytest.mark.parametrize( 506 "display_max_rows, n_vars, n_attr", 507 [(50, 40, 30), (35, 40, 30), (11, 40, 30), (1, 40, 30)], 508) 509def test__mapping_repr(display_max_rows, n_vars, n_attr) -> None: 510 long_name = "long_name" 511 a = defchararray.add(long_name, np.arange(0, n_vars).astype(str)) 512 b = defchararray.add("attr_", np.arange(0, n_attr).astype(str)) 513 c = defchararray.add("coord", np.arange(0, n_vars).astype(str)) 514 attrs = {k: 2 for k in b} 515 coords = {_c: np.array([0, 1]) for _c in c} 516 data_vars = dict() 517 for (v, _c) in zip(a, coords.items()): 518 data_vars[v] = xr.DataArray( 519 name=v, 520 data=np.array([3, 4]), 521 dims=[_c[0]], 522 coords=dict([_c]), 523 ) 524 ds = xr.Dataset(data_vars) 525 ds.attrs = attrs 526 527 with xr.set_options(display_max_rows=display_max_rows): 528 529 # Parse the data_vars print and show only data_vars rows: 530 summary = formatting.dataset_repr(ds).split("\n") 531 summary = [v for v in summary if long_name in v] 532 # The length should be less than or equal to display_max_rows: 533 len_summary = len(summary) 534 data_vars_print_size = min(display_max_rows, len_summary) 535 assert len_summary == data_vars_print_size 536 537 summary = formatting.data_vars_repr(ds.data_vars).split("\n") 538 summary = [v for v in summary if long_name in v] 539 # The length should be equal to the number of data variables 540 len_summary = len(summary) 541 assert len_summary == n_vars 542 543 summary = formatting.coords_repr(ds.coords).split("\n") 544 summary = [v for v in summary if "coord" in v] 545 # The length should be equal to the number of data variables 546 len_summary = len(summary) 547 assert len_summary == n_vars 548 549 with xr.set_options( 550 display_expand_coords=False, 551 display_expand_data_vars=False, 552 display_expand_attrs=False, 553 ): 554 actual = formatting.dataset_repr(ds) 555 coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()]) 556 expected = dedent( 557 f"""\ 558 <xarray.Dataset> 559 Dimensions: ({coord_s}) 560 Coordinates: ({n_vars}) 561 Data variables: ({n_vars}) 562 Attributes: ({n_attr})""" 563 ) 564 assert actual == expected 565