1import numpy as np 2import pytest 3from scipy import sparse 4from scipy.sparse import random as sparse_random 5from sklearn.utils._testing import assert_array_almost_equal 6 7from numpy.testing import assert_allclose, assert_array_equal 8from scipy.interpolate import BSpline 9from sklearn.linear_model import LinearRegression 10from sklearn.pipeline import Pipeline 11from sklearn.preprocessing import ( 12 KBinsDiscretizer, 13 PolynomialFeatures, 14 SplineTransformer, 15) 16from sklearn.utils.fixes import linspace, sp_version, parse_version 17 18 19@pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer)) 20def test_polynomial_and_spline_array_order(est): 21 """Test that output array has the given order.""" 22 X = np.arange(10).reshape(5, 2) 23 24 def is_c_contiguous(a): 25 return np.isfortran(a.T) 26 27 assert is_c_contiguous(est().fit_transform(X)) 28 assert is_c_contiguous(est(order="C").fit_transform(X)) 29 assert np.isfortran(est(order="F").fit_transform(X)) 30 31 32@pytest.mark.parametrize( 33 "params, err_msg", 34 [ 35 ({"degree": -1}, "degree must be a non-negative integer"), 36 ({"degree": 2.5}, "degree must be a non-negative integer"), 37 ({"degree": "string"}, "degree must be a non-negative integer"), 38 ({"n_knots": 1}, "n_knots must be a positive integer >= 2."), 39 ({"n_knots": 1}, "n_knots must be a positive integer >= 2."), 40 ({"n_knots": 2.5}, "n_knots must be a positive integer >= 2."), 41 ({"n_knots": "string"}, "n_knots must be a positive integer >= 2."), 42 ({"knots": 1}, "Expected 2D array, got scalar array instead:"), 43 ({"knots": [1, 2]}, "Expected 2D array, got 1D array instead:"), 44 ( 45 {"knots": [[1]]}, 46 r"Number of knots, knots.shape\[0\], must be >= 2.", 47 ), 48 ( 49 {"knots": [[1, 5], [2, 6]]}, 50 r"knots.shape\[1\] == n_features is violated.", 51 ), 52 ( 53 {"knots": [[1], [1], [2]]}, 54 "knots must be sorted without duplicates.", 55 ), 56 ({"knots": [[2], [1]]}, "knots must be sorted without duplicates."), 57 ( 58 {"extrapolation": None}, 59 "extrapolation must be one of 'error', 'constant', 'linear', " 60 "'continue' or 'periodic'.", 61 ), 62 ( 63 {"extrapolation": 1}, 64 "extrapolation must be one of 'error', 'constant', 'linear', " 65 "'continue' or 'periodic'.", 66 ), 67 ( 68 {"extrapolation": "string"}, 69 "extrapolation must be one of 'error', 'constant', 'linear', " 70 "'continue' or 'periodic'.", 71 ), 72 ({"include_bias": None}, "include_bias must be bool."), 73 ({"include_bias": 1}, "include_bias must be bool."), 74 ({"include_bias": "string"}, "include_bias must be bool."), 75 ( 76 {"extrapolation": "periodic", "n_knots": 3, "degree": 3}, 77 "Periodic splines require degree < n_knots. Got n_knots=3 and degree=3.", 78 ), 79 ( 80 {"extrapolation": "periodic", "knots": [[0], [1]], "degree": 2}, 81 "Periodic splines require degree < n_knots. Got n_knots=2 and degree=2.", 82 ), 83 ], 84) 85def test_spline_transformer_input_validation(params, err_msg): 86 """Test that we raise errors for invalid input in SplineTransformer.""" 87 X = [[1], [2]] 88 89 with pytest.raises(ValueError, match=err_msg): 90 SplineTransformer(**params).fit(X) 91 92 93def test_spline_transformer_manual_knot_input(): 94 """ 95 Test that array-like knot positions in SplineTransformer are accepted. 96 """ 97 X = np.arange(20).reshape(10, 2) 98 knots = [[0.5, 1], [1.5, 2], [5, 10]] 99 st1 = SplineTransformer(degree=3, knots=knots, n_knots=None).fit(X) 100 knots = np.asarray(knots) 101 st2 = SplineTransformer(degree=3, knots=knots, n_knots=None).fit(X) 102 for i in range(X.shape[1]): 103 assert_allclose(st1.bsplines_[i].t, st2.bsplines_[i].t) 104 105 106@pytest.mark.parametrize("extrapolation", ["continue", "periodic"]) 107def test_spline_transformer_integer_knots(extrapolation): 108 """Test that SplineTransformer accepts integer value knot positions.""" 109 X = np.arange(20).reshape(10, 2) 110 knots = [[0, 1], [1, 2], [5, 5], [11, 10], [12, 11]] 111 _ = SplineTransformer( 112 degree=3, knots=knots, extrapolation=extrapolation 113 ).fit_transform(X) 114 115 116# TODO: Remove in 1.2 when get_feature_names is removed. 117@pytest.mark.filterwarnings("ignore::FutureWarning:sklearn") 118@pytest.mark.parametrize("get_names", ["get_feature_names", "get_feature_names_out"]) 119def test_spline_transformer_feature_names(get_names): 120 """Test that SplineTransformer generates correct features name.""" 121 X = np.arange(20).reshape(10, 2) 122 splt = SplineTransformer(n_knots=3, degree=3, include_bias=True).fit(X) 123 feature_names = getattr(splt, get_names)() 124 assert_array_equal( 125 feature_names, 126 [ 127 "x0_sp_0", 128 "x0_sp_1", 129 "x0_sp_2", 130 "x0_sp_3", 131 "x0_sp_4", 132 "x1_sp_0", 133 "x1_sp_1", 134 "x1_sp_2", 135 "x1_sp_3", 136 "x1_sp_4", 137 ], 138 ) 139 140 splt = SplineTransformer(n_knots=3, degree=3, include_bias=False).fit(X) 141 feature_names = getattr(splt, get_names)(["a", "b"]) 142 assert_array_equal( 143 feature_names, 144 [ 145 "a_sp_0", 146 "a_sp_1", 147 "a_sp_2", 148 "a_sp_3", 149 "b_sp_0", 150 "b_sp_1", 151 "b_sp_2", 152 "b_sp_3", 153 ], 154 ) 155 156 157@pytest.mark.parametrize("degree", range(1, 5)) 158@pytest.mark.parametrize("n_knots", range(3, 5)) 159@pytest.mark.parametrize("knots", ["uniform", "quantile"]) 160@pytest.mark.parametrize("extrapolation", ["constant", "periodic"]) 161def test_spline_transformer_unity_decomposition(degree, n_knots, knots, extrapolation): 162 """Test that B-splines are indeed a decomposition of unity. 163 164 Splines basis functions must sum up to 1 per row, if we stay in between 165 boundaries. 166 """ 167 X = np.linspace(0, 1, 100)[:, None] 168 # make the boundaries 0 and 1 part of X_train, for sure. 169 X_train = np.r_[[[0]], X[::2, :], [[1]]] 170 X_test = X[1::2, :] 171 172 if extrapolation == "periodic": 173 n_knots = n_knots + degree # periodic splines require degree < n_knots 174 175 splt = SplineTransformer( 176 n_knots=n_knots, 177 degree=degree, 178 knots=knots, 179 include_bias=True, 180 extrapolation=extrapolation, 181 ) 182 splt.fit(X_train) 183 for X in [X_train, X_test]: 184 assert_allclose(np.sum(splt.transform(X), axis=1), 1) 185 186 187@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)]) 188def test_spline_transformer_linear_regression(bias, intercept): 189 """Test that B-splines fit a sinusodial curve pretty well.""" 190 X = np.linspace(0, 10, 100)[:, None] 191 y = np.sin(X[:, 0]) + 2 # +2 to avoid the value 0 in assert_allclose 192 pipe = Pipeline( 193 steps=[ 194 ( 195 "spline", 196 SplineTransformer( 197 n_knots=15, 198 degree=3, 199 include_bias=bias, 200 extrapolation="constant", 201 ), 202 ), 203 ("ols", LinearRegression(fit_intercept=intercept)), 204 ] 205 ) 206 pipe.fit(X, y) 207 assert_allclose(pipe.predict(X), y, rtol=1e-3) 208 209 210@pytest.mark.parametrize( 211 ["knots", "n_knots", "sample_weight", "expected_knots"], 212 [ 213 ("uniform", 3, None, np.array([[0, 2], [3, 8], [6, 14]])), 214 ( 215 "uniform", 216 3, 217 np.array([0, 0, 1, 1, 0, 3, 1]), 218 np.array([[2, 2], [4, 8], [6, 14]]), 219 ), 220 ("uniform", 4, None, np.array([[0, 2], [2, 6], [4, 10], [6, 14]])), 221 ("quantile", 3, None, np.array([[0, 2], [3, 3], [6, 14]])), 222 ( 223 "quantile", 224 3, 225 np.array([0, 0, 1, 1, 0, 3, 1]), 226 np.array([[2, 2], [5, 8], [6, 14]]), 227 ), 228 ], 229) 230def test_spline_transformer_get_base_knot_positions( 231 knots, n_knots, sample_weight, expected_knots 232): 233 # Check the behaviour to find the positions of the knots with and without 234 # `sample_weight` 235 X = np.array([[0, 2], [0, 2], [2, 2], [3, 3], [4, 6], [5, 8], [6, 14]]) 236 base_knots = SplineTransformer._get_base_knot_positions( 237 X=X, knots=knots, n_knots=n_knots, sample_weight=sample_weight 238 ) 239 assert_allclose(base_knots, expected_knots) 240 241 242@pytest.mark.parametrize( 243 "knots, n_knots, degree", 244 [ 245 ("uniform", 5, 3), 246 ("uniform", 12, 8), 247 ( 248 [[-1.0, 0.0], [0, 1.0], [0.1, 2.0], [0.2, 3.0], [0.3, 4.0], [1, 5.0]], 249 None, 250 3, 251 ), 252 ], 253) 254def test_spline_transformer_periodicity_of_extrapolation(knots, n_knots, degree): 255 """Test that the SplineTransformer is periodic for multiple features.""" 256 X_1 = linspace((-1, 0), (1, 5), 10) 257 X_2 = linspace((1, 5), (3, 10), 10) 258 259 splt = SplineTransformer( 260 knots=knots, n_knots=n_knots, degree=degree, extrapolation="periodic" 261 ) 262 splt.fit(X_1) 263 264 assert_allclose(splt.transform(X_1), splt.transform(X_2)) 265 266 267@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)]) 268def test_spline_transformer_periodic_linear_regression(bias, intercept): 269 """Test that B-splines fit a periodic curve pretty well.""" 270 # "+ 3" to avoid the value 0 in assert_allclose 271 def f(x): 272 return np.sin(2 * np.pi * x) - np.sin(8 * np.pi * x) + 3 273 274 X = np.linspace(0, 1, 101)[:, None] 275 pipe = Pipeline( 276 steps=[ 277 ( 278 "spline", 279 SplineTransformer( 280 n_knots=20, 281 degree=3, 282 include_bias=bias, 283 extrapolation="periodic", 284 ), 285 ), 286 ("ols", LinearRegression(fit_intercept=intercept)), 287 ] 288 ) 289 pipe.fit(X, f(X[:, 0])) 290 291 # Generate larger array to check periodic extrapolation 292 X_ = np.linspace(-1, 2, 301)[:, None] 293 predictions = pipe.predict(X_) 294 assert_allclose(predictions, f(X_[:, 0]), atol=0.01, rtol=0.01) 295 assert_allclose(predictions[0:100], predictions[100:200], rtol=1e-3) 296 297 298@pytest.mark.skipif( 299 sp_version < parse_version("1.0.0"), 300 reason="Periodic extrapolation not yet implemented for BSpline.", 301) 302def test_spline_transformer_periodic_spline_backport(): 303 """Test that the backport of extrapolate="periodic" works correctly""" 304 X = np.linspace(-2, 3.5, 10)[:, None] 305 degree = 2 306 307 # Use periodic extrapolation backport in SplineTransformer 308 transformer = SplineTransformer( 309 degree=degree, extrapolation="periodic", knots=[[-1.0], [0.0], [1.0]] 310 ) 311 Xt = transformer.fit_transform(X) 312 313 # Use periodic extrapolation in BSpline 314 coef = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]]) 315 spl = BSpline(np.arange(-3, 4), coef, degree, "periodic") 316 Xspl = spl(X[:, 0]) 317 assert_allclose(Xt, Xspl) 318 319 320def test_spline_transformer_periodic_splines_periodicity(): 321 """ 322 Test if shifted knots result in the same transformation up to permutation. 323 """ 324 X = np.linspace(0, 10, 101)[:, None] 325 326 transformer_1 = SplineTransformer( 327 degree=3, 328 extrapolation="periodic", 329 knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]], 330 ) 331 332 transformer_2 = SplineTransformer( 333 degree=3, 334 extrapolation="periodic", 335 knots=[[1.0], [3.0], [4.0], [5.0], [8.0], [9.0]], 336 ) 337 338 Xt_1 = transformer_1.fit_transform(X) 339 Xt_2 = transformer_2.fit_transform(X) 340 341 assert_allclose(Xt_1, Xt_2[:, [4, 0, 1, 2, 3]]) 342 343 344@pytest.mark.parametrize("degree", [3, 5]) 345def test_spline_transformer_periodic_splines_smoothness(degree): 346 """Test that spline transformation is smooth at first / last knot.""" 347 X = np.linspace(-2, 10, 10_000)[:, None] 348 349 transformer = SplineTransformer( 350 degree=degree, 351 extrapolation="periodic", 352 knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]], 353 ) 354 Xt = transformer.fit_transform(X) 355 356 delta = (X.max() - X.min()) / len(X) 357 tol = 10 * delta 358 359 dXt = Xt 360 # We expect splines of degree `degree` to be (`degree`-1) times 361 # continuously differentiable. I.e. for d = 0, ..., `degree` - 1 the d-th 362 # derivative should be continuous. This is the case if the (d+1)-th 363 # numerical derivative is reasonably small (smaller than `tol` in absolute 364 # value). We thus compute d-th numeric derivatives for d = 1, ..., `degree` 365 # and compare them to `tol`. 366 # 367 # Note that the 0-th derivative is the function itself, such that we are 368 # also checking its continuity. 369 for d in range(1, degree + 1): 370 # Check continuity of the (d-1)-th derivative 371 diff = np.diff(dXt, axis=0) 372 assert np.abs(diff).max() < tol 373 # Compute d-th numeric derivative 374 dXt = diff / delta 375 376 # As degree `degree` splines are not `degree` times continuously 377 # differentiable at the knots, the `degree + 1`-th numeric derivative 378 # should have spikes at the knots. 379 diff = np.diff(dXt, axis=0) 380 assert np.abs(diff).max() > 1 381 382 383@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)]) 384@pytest.mark.parametrize("degree", [1, 2, 3, 4, 5]) 385def test_spline_transformer_extrapolation(bias, intercept, degree): 386 """Test that B-spline extrapolation works correctly.""" 387 # we use a straight line for that 388 X = np.linspace(-1, 1, 100)[:, None] 389 y = X.squeeze() 390 391 # 'constant' 392 pipe = Pipeline( 393 [ 394 [ 395 "spline", 396 SplineTransformer( 397 n_knots=4, 398 degree=degree, 399 include_bias=bias, 400 extrapolation="constant", 401 ), 402 ], 403 ["ols", LinearRegression(fit_intercept=intercept)], 404 ] 405 ) 406 pipe.fit(X, y) 407 assert_allclose(pipe.predict([[-10], [5]]), [-1, 1]) 408 409 # 'linear' 410 pipe = Pipeline( 411 [ 412 [ 413 "spline", 414 SplineTransformer( 415 n_knots=4, 416 degree=degree, 417 include_bias=bias, 418 extrapolation="linear", 419 ), 420 ], 421 ["ols", LinearRegression(fit_intercept=intercept)], 422 ] 423 ) 424 pipe.fit(X, y) 425 assert_allclose(pipe.predict([[-10], [5]]), [-10, 5]) 426 427 # 'error' 428 splt = SplineTransformer( 429 n_knots=4, degree=degree, include_bias=bias, extrapolation="error" 430 ) 431 splt.fit(X) 432 with pytest.raises(ValueError): 433 splt.transform([[-10]]) 434 with pytest.raises(ValueError): 435 splt.transform([[5]]) 436 437 438def test_spline_transformer_kbindiscretizer(): 439 """Test that a B-spline of degree=0 is equivalent to KBinsDiscretizer.""" 440 rng = np.random.RandomState(97531) 441 X = rng.randn(200).reshape(200, 1) 442 n_bins = 5 443 n_knots = n_bins + 1 444 445 splt = SplineTransformer( 446 n_knots=n_knots, degree=0, knots="quantile", include_bias=True 447 ) 448 splines = splt.fit_transform(X) 449 450 kbd = KBinsDiscretizer(n_bins=n_bins, encode="onehot-dense", strategy="quantile") 451 kbins = kbd.fit_transform(X) 452 453 # Though they should be exactly equal, we test approximately with high 454 # accuracy. 455 assert_allclose(splines, kbins, rtol=1e-13) 456 457 458@pytest.mark.parametrize("n_knots", [5, 10]) 459@pytest.mark.parametrize("include_bias", [True, False]) 460@pytest.mark.parametrize("degree", [3, 5]) 461def test_spline_transformer_n_features_out(n_knots, include_bias, degree): 462 """Test that transform results in n_features_out_ features.""" 463 splt = SplineTransformer(n_knots=n_knots, degree=degree, include_bias=include_bias) 464 X = np.linspace(0, 1, 10)[:, None] 465 splt.fit(X) 466 467 assert splt.transform(X).shape[1] == splt.n_features_out_ 468 469 470@pytest.mark.parametrize( 471 "params, err_msg", 472 [ 473 ({"degree": -1}, "degree must be a non-negative integer"), 474 ({"degree": 2.5}, "degree must be a non-negative int or tuple"), 475 ({"degree": "12"}, r"degree=\(min_degree, max_degree\) must"), 476 ({"degree": "string"}, "degree must be a non-negative int or tuple"), 477 ({"degree": (-1, 2)}, r"degree=\(min_degree, max_degree\) must"), 478 ({"degree": (0, 1.5)}, r"degree=\(min_degree, max_degree\) must"), 479 ({"degree": (3, 2)}, r"degree=\(min_degree, max_degree\) must"), 480 ], 481) 482def test_polynomial_features_input_validation(params, err_msg): 483 """Test that we raise errors for invalid input in PolynomialFeatures.""" 484 X = [[1], [2]] 485 486 with pytest.raises(ValueError, match=err_msg): 487 PolynomialFeatures(**params).fit(X) 488 489 490@pytest.fixture() 491def single_feature_degree3(): 492 X = np.arange(6)[:, np.newaxis] 493 P = np.hstack([np.ones_like(X), X, X ** 2, X ** 3]) 494 return X, P 495 496 497@pytest.mark.parametrize( 498 "degree, include_bias, interaction_only, indices", 499 [ 500 (3, True, False, slice(None, None)), 501 (3, False, False, slice(1, None)), 502 (3, True, True, [0, 1]), 503 (3, False, True, [1]), 504 ((2, 3), True, False, [0, 2, 3]), 505 ((2, 3), False, False, [2, 3]), 506 ((2, 3), True, True, [0]), 507 ((2, 3), False, True, []), 508 ], 509) 510@pytest.mark.parametrize( 511 "sparse_X", 512 [False, sparse.csr_matrix, sparse.csc_matrix], 513) 514def test_polynomial_features_one_feature( 515 single_feature_degree3, 516 degree, 517 include_bias, 518 interaction_only, 519 indices, 520 sparse_X, 521): 522 """Test PolynomialFeatures on single feature up to degree 3.""" 523 X, P = single_feature_degree3 524 if sparse_X: 525 X = sparse_X(X) 526 tf = PolynomialFeatures( 527 degree=degree, include_bias=include_bias, interaction_only=interaction_only 528 ).fit(X) 529 out = tf.transform(X) 530 if sparse_X: 531 out = out.toarray() 532 assert_allclose(out, P[:, indices]) 533 if tf.n_output_features_ > 0: 534 assert tf.powers_.shape == (tf.n_output_features_, tf.n_features_in_) 535 536 537@pytest.fixture() 538def two_features_degree3(): 539 X = np.arange(6).reshape((3, 2)) 540 x1 = X[:, :1] 541 x2 = X[:, 1:] 542 P = np.hstack( 543 [ 544 x1 ** 0 * x2 ** 0, # 0 545 x1 ** 1 * x2 ** 0, # 1 546 x1 ** 0 * x2 ** 1, # 2 547 x1 ** 2 * x2 ** 0, # 3 548 x1 ** 1 * x2 ** 1, # 4 549 x1 ** 0 * x2 ** 2, # 5 550 x1 ** 3 * x2 ** 0, # 6 551 x1 ** 2 * x2 ** 1, # 7 552 x1 ** 1 * x2 ** 2, # 8 553 x1 ** 0 * x2 ** 3, # 9 554 ] 555 ) 556 return X, P 557 558 559@pytest.mark.parametrize( 560 "degree, include_bias, interaction_only, indices", 561 [ 562 (2, True, False, slice(0, 6)), 563 (2, False, False, slice(1, 6)), 564 (2, True, True, [0, 1, 2, 4]), 565 (2, False, True, [1, 2, 4]), 566 ((2, 2), True, False, [0, 3, 4, 5]), 567 ((2, 2), False, False, [3, 4, 5]), 568 ((2, 2), True, True, [0, 4]), 569 ((2, 2), False, True, [4]), 570 (3, True, False, slice(None, None)), 571 (3, False, False, slice(1, None)), 572 (3, True, True, [0, 1, 2, 4]), 573 (3, False, True, [1, 2, 4]), 574 ((2, 3), True, False, [0, 3, 4, 5, 6, 7, 8, 9]), 575 ((2, 3), False, False, slice(3, None)), 576 ((2, 3), True, True, [0, 4]), 577 ((2, 3), False, True, [4]), 578 ((3, 3), True, False, [0, 6, 7, 8, 9]), 579 ((3, 3), False, False, [6, 7, 8, 9]), 580 ((3, 3), True, True, [0]), 581 ((3, 3), False, True, []), # would need 3 input features 582 ], 583) 584@pytest.mark.parametrize( 585 "sparse_X", 586 [False, sparse.csr_matrix, sparse.csc_matrix], 587) 588def test_polynomial_features_two_features( 589 two_features_degree3, 590 degree, 591 include_bias, 592 interaction_only, 593 indices, 594 sparse_X, 595): 596 """Test PolynomialFeatures on 2 features up to degree 3.""" 597 X, P = two_features_degree3 598 if sparse_X: 599 X = sparse_X(X) 600 tf = PolynomialFeatures( 601 degree=degree, include_bias=include_bias, interaction_only=interaction_only 602 ).fit(X) 603 out = tf.transform(X) 604 if sparse_X: 605 out = out.toarray() 606 assert_allclose(out, P[:, indices]) 607 if tf.n_output_features_ > 0: 608 assert tf.powers_.shape == (tf.n_output_features_, tf.n_features_in_) 609 610 611# TODO: Remove in 1.2 when get_feature_names is removed. 612@pytest.mark.filterwarnings("ignore::FutureWarning:sklearn") 613@pytest.mark.parametrize("get_names", ["get_feature_names", "get_feature_names_out"]) 614def test_polynomial_feature_names(get_names): 615 X = np.arange(30).reshape(10, 3) 616 poly = PolynomialFeatures(degree=2, include_bias=True).fit(X) 617 feature_names = poly.get_feature_names() 618 assert_array_equal( 619 ["1", "x0", "x1", "x2", "x0^2", "x0 x1", "x0 x2", "x1^2", "x1 x2", "x2^2"], 620 feature_names, 621 ) 622 assert len(feature_names) == poly.transform(X).shape[1] 623 624 poly = PolynomialFeatures(degree=3, include_bias=False).fit(X) 625 feature_names = getattr(poly, get_names)(["a", "b", "c"]) 626 assert_array_equal( 627 [ 628 "a", 629 "b", 630 "c", 631 "a^2", 632 "a b", 633 "a c", 634 "b^2", 635 "b c", 636 "c^2", 637 "a^3", 638 "a^2 b", 639 "a^2 c", 640 "a b^2", 641 "a b c", 642 "a c^2", 643 "b^3", 644 "b^2 c", 645 "b c^2", 646 "c^3", 647 ], 648 feature_names, 649 ) 650 assert len(feature_names) == poly.transform(X).shape[1] 651 652 poly = PolynomialFeatures(degree=(2, 3), include_bias=False).fit(X) 653 feature_names = getattr(poly, get_names)(["a", "b", "c"]) 654 assert_array_equal( 655 [ 656 "a^2", 657 "a b", 658 "a c", 659 "b^2", 660 "b c", 661 "c^2", 662 "a^3", 663 "a^2 b", 664 "a^2 c", 665 "a b^2", 666 "a b c", 667 "a c^2", 668 "b^3", 669 "b^2 c", 670 "b c^2", 671 "c^3", 672 ], 673 feature_names, 674 ) 675 assert len(feature_names) == poly.transform(X).shape[1] 676 677 poly = PolynomialFeatures( 678 degree=(3, 3), include_bias=True, interaction_only=True 679 ).fit(X) 680 feature_names = getattr(poly, get_names)(["a", "b", "c"]) 681 assert_array_equal(["1", "a b c"], feature_names) 682 assert len(feature_names) == poly.transform(X).shape[1] 683 684 # test some unicode 685 poly = PolynomialFeatures(degree=1, include_bias=True).fit(X) 686 feature_names = poly.get_feature_names(["\u0001F40D", "\u262E", "\u05D0"]) 687 assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"], feature_names) 688 689 690@pytest.mark.parametrize( 691 ["deg", "include_bias", "interaction_only", "dtype"], 692 [ 693 (1, True, False, int), 694 (2, True, False, int), 695 (2, True, False, np.float32), 696 (2, True, False, np.float64), 697 (3, False, False, np.float64), 698 (3, False, True, np.float64), 699 (4, False, False, np.float64), 700 (4, False, True, np.float64), 701 ], 702) 703def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype): 704 rng = np.random.RandomState(0) 705 X = rng.randint(0, 2, (100, 2)) 706 X_csc = sparse.csc_matrix(X) 707 708 est = PolynomialFeatures( 709 deg, include_bias=include_bias, interaction_only=interaction_only 710 ) 711 Xt_csc = est.fit_transform(X_csc.astype(dtype)) 712 Xt_dense = est.fit_transform(X.astype(dtype)) 713 714 assert isinstance(Xt_csc, sparse.csc_matrix) 715 assert Xt_csc.dtype == Xt_dense.dtype 716 assert_array_almost_equal(Xt_csc.A, Xt_dense) 717 718 719@pytest.mark.parametrize( 720 ["deg", "include_bias", "interaction_only", "dtype"], 721 [ 722 (1, True, False, int), 723 (2, True, False, int), 724 (2, True, False, np.float32), 725 (2, True, False, np.float64), 726 (3, False, False, np.float64), 727 (3, False, True, np.float64), 728 ], 729) 730def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype): 731 rng = np.random.RandomState(0) 732 X = rng.randint(0, 2, (100, 2)) 733 X_csr = sparse.csr_matrix(X) 734 735 est = PolynomialFeatures( 736 deg, include_bias=include_bias, interaction_only=interaction_only 737 ) 738 Xt_csr = est.fit_transform(X_csr.astype(dtype)) 739 Xt_dense = est.fit_transform(X.astype(dtype, copy=False)) 740 741 assert isinstance(Xt_csr, sparse.csr_matrix) 742 assert Xt_csr.dtype == Xt_dense.dtype 743 assert_array_almost_equal(Xt_csr.A, Xt_dense) 744 745 746@pytest.mark.parametrize("n_features", [1, 4, 5]) 747@pytest.mark.parametrize( 748 "min_degree, max_degree", [(0, 1), (0, 2), (1, 3), (0, 4), (3, 4)] 749) 750@pytest.mark.parametrize("interaction_only", [True, False]) 751@pytest.mark.parametrize("include_bias", [True, False]) 752def test_num_combinations( 753 n_features, 754 min_degree, 755 max_degree, 756 interaction_only, 757 include_bias, 758): 759 """ 760 Test that n_output_features_ is calculated correctly. 761 """ 762 x = sparse.csr_matrix(([1], ([0], [n_features - 1]))) 763 est = PolynomialFeatures( 764 degree=max_degree, 765 interaction_only=interaction_only, 766 include_bias=include_bias, 767 ) 768 est.fit(x) 769 num_combos = est.n_output_features_ 770 771 combos = PolynomialFeatures._combinations( 772 n_features=n_features, 773 min_degree=0, 774 max_degree=max_degree, 775 interaction_only=interaction_only, 776 include_bias=include_bias, 777 ) 778 assert num_combos == sum([1 for _ in combos]) 779 780 781@pytest.mark.parametrize( 782 ["deg", "include_bias", "interaction_only", "dtype"], 783 [ 784 (2, True, False, np.float32), 785 (2, True, False, np.float64), 786 (3, False, False, np.float64), 787 (3, False, True, np.float64), 788 ], 789) 790def test_polynomial_features_csr_X_floats(deg, include_bias, interaction_only, dtype): 791 X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr() 792 X = X_csr.toarray() 793 794 est = PolynomialFeatures( 795 deg, include_bias=include_bias, interaction_only=interaction_only 796 ) 797 Xt_csr = est.fit_transform(X_csr.astype(dtype)) 798 Xt_dense = est.fit_transform(X.astype(dtype)) 799 800 assert isinstance(Xt_csr, sparse.csr_matrix) 801 assert Xt_csr.dtype == Xt_dense.dtype 802 assert_array_almost_equal(Xt_csr.A, Xt_dense) 803 804 805@pytest.mark.parametrize( 806 ["zero_row_index", "deg", "interaction_only"], 807 [ 808 (0, 2, True), 809 (1, 2, True), 810 (2, 2, True), 811 (0, 3, True), 812 (1, 3, True), 813 (2, 3, True), 814 (0, 2, False), 815 (1, 2, False), 816 (2, 2, False), 817 (0, 3, False), 818 (1, 3, False), 819 (2, 3, False), 820 ], 821) 822def test_polynomial_features_csr_X_zero_row(zero_row_index, deg, interaction_only): 823 X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr() 824 X_csr[zero_row_index, :] = 0.0 825 X = X_csr.toarray() 826 827 est = PolynomialFeatures(deg, include_bias=False, interaction_only=interaction_only) 828 Xt_csr = est.fit_transform(X_csr) 829 Xt_dense = est.fit_transform(X) 830 831 assert isinstance(Xt_csr, sparse.csr_matrix) 832 assert Xt_csr.dtype == Xt_dense.dtype 833 assert_array_almost_equal(Xt_csr.A, Xt_dense) 834 835 836# This degree should always be one more than the highest degree supported by 837# _csr_expansion. 838@pytest.mark.parametrize( 839 ["include_bias", "interaction_only"], 840 [(True, True), (True, False), (False, True), (False, False)], 841) 842def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only): 843 X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr() 844 X = X_csr.toarray() 845 846 est = PolynomialFeatures( 847 4, include_bias=include_bias, interaction_only=interaction_only 848 ) 849 Xt_csr = est.fit_transform(X_csr) 850 Xt_dense = est.fit_transform(X) 851 852 assert isinstance(Xt_csr, sparse.csr_matrix) 853 assert Xt_csr.dtype == Xt_dense.dtype 854 assert_array_almost_equal(Xt_csr.A, Xt_dense) 855 856 857@pytest.mark.parametrize( 858 ["deg", "dim", "interaction_only"], 859 [ 860 (2, 1, True), 861 (2, 2, True), 862 (3, 1, True), 863 (3, 2, True), 864 (3, 3, True), 865 (2, 1, False), 866 (2, 2, False), 867 (3, 1, False), 868 (3, 2, False), 869 (3, 3, False), 870 ], 871) 872def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only): 873 X_csr = sparse_random(1000, dim, 0.5, random_state=0).tocsr() 874 X = X_csr.toarray() 875 876 est = PolynomialFeatures(deg, interaction_only=interaction_only) 877 Xt_csr = est.fit_transform(X_csr) 878 Xt_dense = est.fit_transform(X) 879 880 assert isinstance(Xt_csr, sparse.csr_matrix) 881 assert Xt_csr.dtype == Xt_dense.dtype 882 assert_array_almost_equal(Xt_csr.A, Xt_dense) 883 884 885def test_polynomial_features_deprecated_n_input_features(): 886 # check that we raise a deprecation warning when accessing 887 # `n_input_features_`. FIXME: remove in 1.2 888 depr_msg = ( 889 "The attribute `n_input_features_` was deprecated in version " 890 "1.0 and will be removed in 1.2." 891 ) 892 X = np.arange(10).reshape(5, 2) 893 894 with pytest.warns(FutureWarning, match=depr_msg): 895 PolynomialFeatures().fit(X).n_input_features_ 896 897 898# TODO: Remove in 1.2 when get_feature_names is removed 899@pytest.mark.parametrize("Transformer", [SplineTransformer, PolynomialFeatures]) 900def test_get_feature_names_deprecated(Transformer): 901 X = np.arange(30).reshape(10, 3) 902 poly = Transformer().fit(X) 903 msg = "get_feature_names is deprecated in 1.0" 904 with pytest.warns(FutureWarning, match=msg): 905 poly.get_feature_names() 906