1import numpy as np 2import pytest 3 4import xarray as xr 5from xarray.core import dtypes, merge 6from xarray.core.merge import MergeError 7from xarray.testing import assert_equal, assert_identical 8 9from .test_dataset import create_test_data 10 11 12class TestMergeInternals: 13 def test_broadcast_dimension_size(self): 14 actual = merge.broadcast_dimension_size( 15 [xr.Variable("x", [1]), xr.Variable("y", [2, 1])] 16 ) 17 assert actual == {"x": 1, "y": 2} 18 19 actual = merge.broadcast_dimension_size( 20 [xr.Variable(("x", "y"), [[1, 2]]), xr.Variable("y", [2, 1])] 21 ) 22 assert actual == {"x": 1, "y": 2} 23 24 with pytest.raises(ValueError): 25 merge.broadcast_dimension_size( 26 [xr.Variable(("x", "y"), [[1, 2]]), xr.Variable("y", [2])] 27 ) 28 29 30class TestMergeFunction: 31 def test_merge_arrays(self): 32 data = create_test_data(add_attrs=False) 33 34 actual = xr.merge([data.var1, data.var2]) 35 expected = data[["var1", "var2"]] 36 assert_identical(actual, expected) 37 38 def test_merge_datasets(self): 39 data = create_test_data(add_attrs=False) 40 41 actual = xr.merge([data[["var1"]], data[["var2"]]]) 42 expected = data[["var1", "var2"]] 43 assert_identical(actual, expected) 44 45 actual = xr.merge([data, data]) 46 assert_identical(actual, data) 47 48 def test_merge_dataarray_unnamed(self): 49 data = xr.DataArray([1, 2], dims="x") 50 with pytest.raises(ValueError, match=r"without providing an explicit name"): 51 xr.merge([data]) 52 53 def test_merge_arrays_attrs_default(self): 54 var1_attrs = {"a": 1, "b": 2} 55 var2_attrs = {"a": 1, "c": 3} 56 expected_attrs = {"a": 1, "b": 2} 57 58 data = create_test_data(add_attrs=False) 59 expected = data[["var1", "var2"]].copy() 60 expected.var1.attrs = var1_attrs 61 expected.var2.attrs = var2_attrs 62 expected.attrs = expected_attrs 63 64 data.var1.attrs = var1_attrs 65 data.var2.attrs = var2_attrs 66 actual = xr.merge([data.var1, data.var2]) 67 assert_identical(actual, expected) 68 69 @pytest.mark.parametrize( 70 "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", 71 [ 72 ( 73 "no_conflicts", 74 {"a": 1, "b": 2}, 75 {"a": 1, "c": 3}, 76 {"a": 1, "b": 2, "c": 3}, 77 False, 78 ), 79 ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), 80 ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), 81 ( 82 "no_conflicts", 83 {"a": 1, "b": 2}, 84 {"a": 4, "c": 3}, 85 {"a": 1, "b": 2, "c": 3}, 86 True, 87 ), 88 ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), 89 ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), 90 ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), 91 ( 92 "override", 93 {"a": 1, "b": 2}, 94 {"a": 4, "b": 5, "c": 3}, 95 {"a": 1, "b": 2}, 96 False, 97 ), 98 ( 99 "drop_conflicts", 100 {"a": 1, "b": 2, "c": 3}, 101 {"b": 1, "c": 3, "d": 4}, 102 {"a": 1, "c": 3, "d": 4}, 103 False, 104 ), 105 ( 106 "drop_conflicts", 107 {"a": 1, "b": np.array([2]), "c": np.array([3])}, 108 {"b": 1, "c": np.array([3]), "d": 4}, 109 {"a": 1, "c": np.array([3]), "d": 4}, 110 False, 111 ), 112 ( 113 lambda attrs, context: attrs[1], 114 {"a": 1, "b": 2, "c": 3}, 115 {"a": 4, "b": 3, "c": 1}, 116 {"a": 4, "b": 3, "c": 1}, 117 False, 118 ), 119 ], 120 ) 121 def test_merge_arrays_attrs( 122 self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception 123 ): 124 data1 = xr.Dataset(attrs=var1_attrs) 125 data2 = xr.Dataset(attrs=var2_attrs) 126 if expect_exception: 127 with pytest.raises(MergeError, match="combine_attrs"): 128 actual = xr.merge([data1, data2], combine_attrs=combine_attrs) 129 else: 130 actual = xr.merge([data1, data2], combine_attrs=combine_attrs) 131 expected = xr.Dataset(attrs=expected_attrs) 132 133 assert_identical(actual, expected) 134 135 @pytest.mark.parametrize( 136 "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", 137 [ 138 ( 139 "no_conflicts", 140 {"a": 1, "b": 2}, 141 {"a": 1, "c": 3}, 142 {"a": 1, "b": 2, "c": 3}, 143 False, 144 ), 145 ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), 146 ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), 147 ( 148 "no_conflicts", 149 {"a": 1, "b": 2}, 150 {"a": 4, "c": 3}, 151 {"a": 1, "b": 2, "c": 3}, 152 True, 153 ), 154 ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), 155 ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), 156 ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), 157 ( 158 "override", 159 {"a": 1, "b": 2}, 160 {"a": 4, "b": 5, "c": 3}, 161 {"a": 1, "b": 2}, 162 False, 163 ), 164 ( 165 "drop_conflicts", 166 {"a": 1, "b": 2, "c": 3}, 167 {"b": 1, "c": 3, "d": 4}, 168 {"a": 1, "c": 3, "d": 4}, 169 False, 170 ), 171 ( 172 lambda attrs, context: attrs[1], 173 {"a": 1, "b": 2, "c": 3}, 174 {"a": 4, "b": 3, "c": 1}, 175 {"a": 4, "b": 3, "c": 1}, 176 False, 177 ), 178 ], 179 ) 180 def test_merge_arrays_attrs_variables( 181 self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception 182 ): 183 """check that combine_attrs is used on data variables and coords""" 184 data1 = xr.Dataset( 185 {"var1": ("dim1", [], attrs1)}, coords={"dim1": ("dim1", [], attrs1)} 186 ) 187 data2 = xr.Dataset( 188 {"var1": ("dim1", [], attrs2)}, coords={"dim1": ("dim1", [], attrs2)} 189 ) 190 191 if expect_exception: 192 with pytest.raises(MergeError, match="combine_attrs"): 193 actual = xr.merge([data1, data2], combine_attrs=combine_attrs) 194 else: 195 actual = xr.merge([data1, data2], combine_attrs=combine_attrs) 196 expected = xr.Dataset( 197 {"var1": ("dim1", [], expected_attrs)}, 198 coords={"dim1": ("dim1", [], expected_attrs)}, 199 ) 200 201 assert_identical(actual, expected) 202 203 def test_merge_attrs_override_copy(self): 204 ds1 = xr.Dataset(attrs={"x": 0}) 205 ds2 = xr.Dataset(attrs={"x": 1}) 206 ds3 = xr.merge([ds1, ds2], combine_attrs="override") 207 ds3.attrs["x"] = 2 208 assert ds1.x == 0 209 210 def test_merge_attrs_drop_conflicts(self): 211 ds1 = xr.Dataset(attrs={"a": 0, "b": 0, "c": 0}) 212 ds2 = xr.Dataset(attrs={"b": 0, "c": 1, "d": 0}) 213 ds3 = xr.Dataset(attrs={"a": 0, "b": 1, "c": 0, "e": 0}) 214 215 actual = xr.merge([ds1, ds2, ds3], combine_attrs="drop_conflicts") 216 expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0}) 217 assert_identical(actual, expected) 218 219 def test_merge_attrs_no_conflicts_compat_minimal(self): 220 """make sure compat="minimal" does not silence errors""" 221 ds1 = xr.Dataset({"a": ("x", [], {"a": 0})}) 222 ds2 = xr.Dataset({"a": ("x", [], {"a": 1})}) 223 224 with pytest.raises(xr.MergeError, match="combine_attrs"): 225 xr.merge([ds1, ds2], combine_attrs="no_conflicts", compat="minimal") 226 227 def test_merge_dicts_simple(self): 228 actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) 229 expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5}) 230 assert_identical(actual, expected) 231 232 def test_merge_dicts_dims(self): 233 actual = xr.merge([{"y": ("x", [13])}, {"x": [12]}]) 234 expected = xr.Dataset({"x": [12], "y": ("x", [13])}) 235 assert_identical(actual, expected) 236 237 def test_merge_error(self): 238 ds = xr.Dataset({"x": 0}) 239 with pytest.raises(xr.MergeError): 240 xr.merge([ds, ds + 1]) 241 242 def test_merge_alignment_error(self): 243 ds = xr.Dataset(coords={"x": [1, 2]}) 244 other = xr.Dataset(coords={"x": [2, 3]}) 245 with pytest.raises(ValueError, match=r"indexes .* not equal"): 246 xr.merge([ds, other], join="exact") 247 248 def test_merge_wrong_input_error(self): 249 with pytest.raises(TypeError, match=r"objects must be an iterable"): 250 xr.merge([1]) 251 ds = xr.Dataset(coords={"x": [1, 2]}) 252 with pytest.raises(TypeError, match=r"objects must be an iterable"): 253 xr.merge({"a": ds}) 254 with pytest.raises(TypeError, match=r"objects must be an iterable"): 255 xr.merge([ds, 1]) 256 257 def test_merge_no_conflicts_single_var(self): 258 ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) 259 ds2 = xr.Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) 260 expected = xr.Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) 261 assert expected.identical(xr.merge([ds1, ds2], compat="no_conflicts")) 262 assert expected.identical(xr.merge([ds2, ds1], compat="no_conflicts")) 263 assert ds1.identical(xr.merge([ds1, ds2], compat="no_conflicts", join="left")) 264 assert ds2.identical(xr.merge([ds1, ds2], compat="no_conflicts", join="right")) 265 expected = xr.Dataset({"a": ("x", [2]), "x": [1]}) 266 assert expected.identical( 267 xr.merge([ds1, ds2], compat="no_conflicts", join="inner") 268 ) 269 270 with pytest.raises(xr.MergeError): 271 ds3 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) 272 xr.merge([ds1, ds3], compat="no_conflicts") 273 274 with pytest.raises(xr.MergeError): 275 ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) 276 xr.merge([ds1, ds3], compat="no_conflicts") 277 278 def test_merge_no_conflicts_multi_var(self): 279 data = create_test_data(add_attrs=False) 280 data1 = data.copy(deep=True) 281 data2 = data.copy(deep=True) 282 283 expected = data[["var1", "var2"]] 284 actual = xr.merge([data1.var1, data2.var2], compat="no_conflicts") 285 assert_identical(expected, actual) 286 287 data1["var1"][:, :5] = np.nan 288 data2["var1"][:, 5:] = np.nan 289 data1["var2"][:4, :] = np.nan 290 data2["var2"][4:, :] = np.nan 291 del data2["var3"] 292 293 actual = xr.merge([data1, data2], compat="no_conflicts") 294 assert_equal(data, actual) 295 296 def test_merge_no_conflicts_preserve_attrs(self): 297 data = xr.Dataset({"x": ([], 0, {"foo": "bar"})}) 298 actual = xr.merge([data, data], combine_attrs="no_conflicts") 299 assert_identical(data, actual) 300 301 def test_merge_no_conflicts_broadcast(self): 302 datasets = [xr.Dataset({"x": ("y", [0])}), xr.Dataset({"x": np.nan})] 303 actual = xr.merge(datasets) 304 expected = xr.Dataset({"x": ("y", [0])}) 305 assert_identical(expected, actual) 306 307 datasets = [xr.Dataset({"x": ("y", [np.nan])}), xr.Dataset({"x": 0})] 308 actual = xr.merge(datasets) 309 assert_identical(expected, actual) 310 311 312class TestMergeMethod: 313 def test_merge(self): 314 data = create_test_data() 315 ds1 = data[["var1"]] 316 ds2 = data[["var3"]] 317 expected = data[["var1", "var3"]] 318 actual = ds1.merge(ds2) 319 assert_identical(expected, actual) 320 321 actual = ds2.merge(ds1) 322 assert_identical(expected, actual) 323 324 actual = data.merge(data) 325 assert_identical(data, actual) 326 actual = data.reset_coords(drop=True).merge(data) 327 assert_identical(data, actual) 328 actual = data.merge(data.reset_coords(drop=True)) 329 assert_identical(data, actual) 330 331 with pytest.raises(ValueError): 332 ds1.merge(ds2.rename({"var3": "var1"})) 333 with pytest.raises(ValueError, match=r"should be coordinates or not"): 334 data.reset_coords().merge(data) 335 with pytest.raises(ValueError, match=r"should be coordinates or not"): 336 data.merge(data.reset_coords()) 337 338 def test_merge_broadcast_equals(self): 339 ds1 = xr.Dataset({"x": 0}) 340 ds2 = xr.Dataset({"x": ("y", [0, 0])}) 341 actual = ds1.merge(ds2) 342 assert_identical(ds2, actual) 343 344 actual = ds2.merge(ds1) 345 assert_identical(ds2, actual) 346 347 actual = ds1.copy() 348 actual.update(ds2) 349 assert_identical(ds2, actual) 350 351 ds1 = xr.Dataset({"x": np.nan}) 352 ds2 = xr.Dataset({"x": ("y", [np.nan, np.nan])}) 353 actual = ds1.merge(ds2) 354 assert_identical(ds2, actual) 355 356 def test_merge_compat(self): 357 ds1 = xr.Dataset({"x": 0}) 358 ds2 = xr.Dataset({"x": 1}) 359 for compat in ["broadcast_equals", "equals", "identical", "no_conflicts"]: 360 with pytest.raises(xr.MergeError): 361 ds1.merge(ds2, compat=compat) 362 363 ds2 = xr.Dataset({"x": [0, 0]}) 364 for compat in ["equals", "identical"]: 365 with pytest.raises(ValueError, match=r"should be coordinates or not"): 366 ds1.merge(ds2, compat=compat) 367 368 ds2 = xr.Dataset({"x": ((), 0, {"foo": "bar"})}) 369 with pytest.raises(xr.MergeError): 370 ds1.merge(ds2, compat="identical") 371 372 with pytest.raises(ValueError, match=r"compat=.* invalid"): 373 ds1.merge(ds2, compat="foobar") 374 375 assert ds1.identical(ds1.merge(ds2, compat="override")) 376 377 def test_merge_auto_align(self): 378 ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) 379 ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) 380 expected = xr.Dataset( 381 {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]} 382 ) 383 assert expected.identical(ds1.merge(ds2)) 384 assert expected.identical(ds2.merge(ds1)) 385 386 expected = expected.isel(x=slice(2)) 387 assert expected.identical(ds1.merge(ds2, join="left")) 388 assert expected.identical(ds2.merge(ds1, join="right")) 389 390 expected = expected.isel(x=slice(1, 2)) 391 assert expected.identical(ds1.merge(ds2, join="inner")) 392 assert expected.identical(ds2.merge(ds1, join="inner")) 393 394 @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"a": 2, "b": 1}]) 395 def test_merge_fill_value(self, fill_value): 396 ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) 397 ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) 398 if fill_value == dtypes.NA: 399 # if we supply the default, we expect the missing value for a 400 # float array 401 fill_value_a = fill_value_b = np.nan 402 elif isinstance(fill_value, dict): 403 fill_value_a = fill_value["a"] 404 fill_value_b = fill_value["b"] 405 else: 406 fill_value_a = fill_value_b = fill_value 407 408 expected = xr.Dataset( 409 {"a": ("x", [1, 2, fill_value_a]), "b": ("x", [fill_value_b, 3, 4])}, 410 {"x": [0, 1, 2]}, 411 ) 412 assert expected.identical(ds1.merge(ds2, fill_value=fill_value)) 413 assert expected.identical(ds2.merge(ds1, fill_value=fill_value)) 414 assert expected.identical(xr.merge([ds1, ds2], fill_value=fill_value)) 415 416 def test_merge_no_conflicts(self): 417 ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) 418 ds2 = xr.Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) 419 expected = xr.Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) 420 421 assert expected.identical(ds1.merge(ds2, compat="no_conflicts")) 422 assert expected.identical(ds2.merge(ds1, compat="no_conflicts")) 423 424 assert ds1.identical(ds1.merge(ds2, compat="no_conflicts", join="left")) 425 426 assert ds2.identical(ds1.merge(ds2, compat="no_conflicts", join="right")) 427 428 expected2 = xr.Dataset({"a": ("x", [2]), "x": [1]}) 429 assert expected2.identical(ds1.merge(ds2, compat="no_conflicts", join="inner")) 430 431 with pytest.raises(xr.MergeError): 432 ds3 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) 433 ds1.merge(ds3, compat="no_conflicts") 434 435 with pytest.raises(xr.MergeError): 436 ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) 437 ds1.merge(ds3, compat="no_conflicts") 438 439 def test_merge_dataarray(self): 440 ds = xr.Dataset({"a": 0}) 441 da = xr.DataArray(data=1, name="b") 442 443 assert_identical(ds.merge(da), xr.merge([ds, da])) 444 445 @pytest.mark.parametrize( 446 ["combine_attrs", "attrs1", "attrs2", "expected_attrs", "expect_error"], 447 # don't need to test thoroughly 448 ( 449 ("drop", {"a": 0, "b": 1, "c": 2}, {"a": 1, "b": 2, "c": 3}, {}, False), 450 ( 451 "drop_conflicts", 452 {"a": 0, "b": 1, "c": 2}, 453 {"b": 2, "c": 2, "d": 3}, 454 {"a": 0, "c": 2, "d": 3}, 455 False, 456 ), 457 ("override", {"a": 0, "b": 1}, {"a": 1, "b": 2}, {"a": 0, "b": 1}, False), 458 ("no_conflicts", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True), 459 ("identical", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True), 460 ), 461 ) 462 def test_merge_combine_attrs( 463 self, combine_attrs, attrs1, attrs2, expected_attrs, expect_error 464 ): 465 ds1 = xr.Dataset(attrs=attrs1) 466 ds2 = xr.Dataset(attrs=attrs2) 467 468 if expect_error: 469 with pytest.raises(xr.MergeError): 470 ds1.merge(ds2, combine_attrs=combine_attrs) 471 else: 472 actual = ds1.merge(ds2, combine_attrs=combine_attrs) 473 expected = xr.Dataset(attrs=expected_attrs) 474 assert_identical(actual, expected) 475