1import numpy as np
2import pytest
3from scipy import sparse
4from scipy.sparse import random as sparse_random
5from sklearn.utils._testing import assert_array_almost_equal
6
7from numpy.testing import assert_allclose, assert_array_equal
8from scipy.interpolate import BSpline
9from sklearn.linear_model import LinearRegression
10from sklearn.pipeline import Pipeline
11from sklearn.preprocessing import (
12    KBinsDiscretizer,
13    PolynomialFeatures,
14    SplineTransformer,
15)
16from sklearn.utils.fixes import linspace, sp_version, parse_version
17
18
19@pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer))
20def test_polynomial_and_spline_array_order(est):
21    """Test that output array has the given order."""
22    X = np.arange(10).reshape(5, 2)
23
24    def is_c_contiguous(a):
25        return np.isfortran(a.T)
26
27    assert is_c_contiguous(est().fit_transform(X))
28    assert is_c_contiguous(est(order="C").fit_transform(X))
29    assert np.isfortran(est(order="F").fit_transform(X))
30
31
32@pytest.mark.parametrize(
33    "params, err_msg",
34    [
35        ({"degree": -1}, "degree must be a non-negative integer"),
36        ({"degree": 2.5}, "degree must be a non-negative integer"),
37        ({"degree": "string"}, "degree must be a non-negative integer"),
38        ({"n_knots": 1}, "n_knots must be a positive integer >= 2."),
39        ({"n_knots": 1}, "n_knots must be a positive integer >= 2."),
40        ({"n_knots": 2.5}, "n_knots must be a positive integer >= 2."),
41        ({"n_knots": "string"}, "n_knots must be a positive integer >= 2."),
42        ({"knots": 1}, "Expected 2D array, got scalar array instead:"),
43        ({"knots": [1, 2]}, "Expected 2D array, got 1D array instead:"),
44        (
45            {"knots": [[1]]},
46            r"Number of knots, knots.shape\[0\], must be >= 2.",
47        ),
48        (
49            {"knots": [[1, 5], [2, 6]]},
50            r"knots.shape\[1\] == n_features is violated.",
51        ),
52        (
53            {"knots": [[1], [1], [2]]},
54            "knots must be sorted without duplicates.",
55        ),
56        ({"knots": [[2], [1]]}, "knots must be sorted without duplicates."),
57        (
58            {"extrapolation": None},
59            "extrapolation must be one of 'error', 'constant', 'linear', "
60            "'continue' or 'periodic'.",
61        ),
62        (
63            {"extrapolation": 1},
64            "extrapolation must be one of 'error', 'constant', 'linear', "
65            "'continue' or 'periodic'.",
66        ),
67        (
68            {"extrapolation": "string"},
69            "extrapolation must be one of 'error', 'constant', 'linear', "
70            "'continue' or 'periodic'.",
71        ),
72        ({"include_bias": None}, "include_bias must be bool."),
73        ({"include_bias": 1}, "include_bias must be bool."),
74        ({"include_bias": "string"}, "include_bias must be bool."),
75        (
76            {"extrapolation": "periodic", "n_knots": 3, "degree": 3},
77            "Periodic splines require degree < n_knots. Got n_knots=3 and degree=3.",
78        ),
79        (
80            {"extrapolation": "periodic", "knots": [[0], [1]], "degree": 2},
81            "Periodic splines require degree < n_knots. Got n_knots=2 and degree=2.",
82        ),
83    ],
84)
85def test_spline_transformer_input_validation(params, err_msg):
86    """Test that we raise errors for invalid input in SplineTransformer."""
87    X = [[1], [2]]
88
89    with pytest.raises(ValueError, match=err_msg):
90        SplineTransformer(**params).fit(X)
91
92
93def test_spline_transformer_manual_knot_input():
94    """
95    Test that array-like knot positions in SplineTransformer are accepted.
96    """
97    X = np.arange(20).reshape(10, 2)
98    knots = [[0.5, 1], [1.5, 2], [5, 10]]
99    st1 = SplineTransformer(degree=3, knots=knots, n_knots=None).fit(X)
100    knots = np.asarray(knots)
101    st2 = SplineTransformer(degree=3, knots=knots, n_knots=None).fit(X)
102    for i in range(X.shape[1]):
103        assert_allclose(st1.bsplines_[i].t, st2.bsplines_[i].t)
104
105
106@pytest.mark.parametrize("extrapolation", ["continue", "periodic"])
107def test_spline_transformer_integer_knots(extrapolation):
108    """Test that SplineTransformer accepts integer value knot positions."""
109    X = np.arange(20).reshape(10, 2)
110    knots = [[0, 1], [1, 2], [5, 5], [11, 10], [12, 11]]
111    _ = SplineTransformer(
112        degree=3, knots=knots, extrapolation=extrapolation
113    ).fit_transform(X)
114
115
116# TODO: Remove in 1.2 when get_feature_names is removed.
117@pytest.mark.filterwarnings("ignore::FutureWarning:sklearn")
118@pytest.mark.parametrize("get_names", ["get_feature_names", "get_feature_names_out"])
119def test_spline_transformer_feature_names(get_names):
120    """Test that SplineTransformer generates correct features name."""
121    X = np.arange(20).reshape(10, 2)
122    splt = SplineTransformer(n_knots=3, degree=3, include_bias=True).fit(X)
123    feature_names = getattr(splt, get_names)()
124    assert_array_equal(
125        feature_names,
126        [
127            "x0_sp_0",
128            "x0_sp_1",
129            "x0_sp_2",
130            "x0_sp_3",
131            "x0_sp_4",
132            "x1_sp_0",
133            "x1_sp_1",
134            "x1_sp_2",
135            "x1_sp_3",
136            "x1_sp_4",
137        ],
138    )
139
140    splt = SplineTransformer(n_knots=3, degree=3, include_bias=False).fit(X)
141    feature_names = getattr(splt, get_names)(["a", "b"])
142    assert_array_equal(
143        feature_names,
144        [
145            "a_sp_0",
146            "a_sp_1",
147            "a_sp_2",
148            "a_sp_3",
149            "b_sp_0",
150            "b_sp_1",
151            "b_sp_2",
152            "b_sp_3",
153        ],
154    )
155
156
157@pytest.mark.parametrize("degree", range(1, 5))
158@pytest.mark.parametrize("n_knots", range(3, 5))
159@pytest.mark.parametrize("knots", ["uniform", "quantile"])
160@pytest.mark.parametrize("extrapolation", ["constant", "periodic"])
161def test_spline_transformer_unity_decomposition(degree, n_knots, knots, extrapolation):
162    """Test that B-splines are indeed a decomposition of unity.
163
164    Splines basis functions must sum up to 1 per row, if we stay in between
165    boundaries.
166    """
167    X = np.linspace(0, 1, 100)[:, None]
168    # make the boundaries 0 and 1 part of X_train, for sure.
169    X_train = np.r_[[[0]], X[::2, :], [[1]]]
170    X_test = X[1::2, :]
171
172    if extrapolation == "periodic":
173        n_knots = n_knots + degree  # periodic splines require degree < n_knots
174
175    splt = SplineTransformer(
176        n_knots=n_knots,
177        degree=degree,
178        knots=knots,
179        include_bias=True,
180        extrapolation=extrapolation,
181    )
182    splt.fit(X_train)
183    for X in [X_train, X_test]:
184        assert_allclose(np.sum(splt.transform(X), axis=1), 1)
185
186
187@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)])
188def test_spline_transformer_linear_regression(bias, intercept):
189    """Test that B-splines fit a sinusodial curve pretty well."""
190    X = np.linspace(0, 10, 100)[:, None]
191    y = np.sin(X[:, 0]) + 2  # +2 to avoid the value 0 in assert_allclose
192    pipe = Pipeline(
193        steps=[
194            (
195                "spline",
196                SplineTransformer(
197                    n_knots=15,
198                    degree=3,
199                    include_bias=bias,
200                    extrapolation="constant",
201                ),
202            ),
203            ("ols", LinearRegression(fit_intercept=intercept)),
204        ]
205    )
206    pipe.fit(X, y)
207    assert_allclose(pipe.predict(X), y, rtol=1e-3)
208
209
210@pytest.mark.parametrize(
211    ["knots", "n_knots", "sample_weight", "expected_knots"],
212    [
213        ("uniform", 3, None, np.array([[0, 2], [3, 8], [6, 14]])),
214        (
215            "uniform",
216            3,
217            np.array([0, 0, 1, 1, 0, 3, 1]),
218            np.array([[2, 2], [4, 8], [6, 14]]),
219        ),
220        ("uniform", 4, None, np.array([[0, 2], [2, 6], [4, 10], [6, 14]])),
221        ("quantile", 3, None, np.array([[0, 2], [3, 3], [6, 14]])),
222        (
223            "quantile",
224            3,
225            np.array([0, 0, 1, 1, 0, 3, 1]),
226            np.array([[2, 2], [5, 8], [6, 14]]),
227        ),
228    ],
229)
230def test_spline_transformer_get_base_knot_positions(
231    knots, n_knots, sample_weight, expected_knots
232):
233    # Check the behaviour to find the positions of the knots with and without
234    # `sample_weight`
235    X = np.array([[0, 2], [0, 2], [2, 2], [3, 3], [4, 6], [5, 8], [6, 14]])
236    base_knots = SplineTransformer._get_base_knot_positions(
237        X=X, knots=knots, n_knots=n_knots, sample_weight=sample_weight
238    )
239    assert_allclose(base_knots, expected_knots)
240
241
242@pytest.mark.parametrize(
243    "knots, n_knots, degree",
244    [
245        ("uniform", 5, 3),
246        ("uniform", 12, 8),
247        (
248            [[-1.0, 0.0], [0, 1.0], [0.1, 2.0], [0.2, 3.0], [0.3, 4.0], [1, 5.0]],
249            None,
250            3,
251        ),
252    ],
253)
254def test_spline_transformer_periodicity_of_extrapolation(knots, n_knots, degree):
255    """Test that the SplineTransformer is periodic for multiple features."""
256    X_1 = linspace((-1, 0), (1, 5), 10)
257    X_2 = linspace((1, 5), (3, 10), 10)
258
259    splt = SplineTransformer(
260        knots=knots, n_knots=n_knots, degree=degree, extrapolation="periodic"
261    )
262    splt.fit(X_1)
263
264    assert_allclose(splt.transform(X_1), splt.transform(X_2))
265
266
267@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)])
268def test_spline_transformer_periodic_linear_regression(bias, intercept):
269    """Test that B-splines fit a periodic curve pretty well."""
270    # "+ 3" to avoid the value 0 in assert_allclose
271    def f(x):
272        return np.sin(2 * np.pi * x) - np.sin(8 * np.pi * x) + 3
273
274    X = np.linspace(0, 1, 101)[:, None]
275    pipe = Pipeline(
276        steps=[
277            (
278                "spline",
279                SplineTransformer(
280                    n_knots=20,
281                    degree=3,
282                    include_bias=bias,
283                    extrapolation="periodic",
284                ),
285            ),
286            ("ols", LinearRegression(fit_intercept=intercept)),
287        ]
288    )
289    pipe.fit(X, f(X[:, 0]))
290
291    # Generate larger array to check periodic extrapolation
292    X_ = np.linspace(-1, 2, 301)[:, None]
293    predictions = pipe.predict(X_)
294    assert_allclose(predictions, f(X_[:, 0]), atol=0.01, rtol=0.01)
295    assert_allclose(predictions[0:100], predictions[100:200], rtol=1e-3)
296
297
298@pytest.mark.skipif(
299    sp_version < parse_version("1.0.0"),
300    reason="Periodic extrapolation not yet implemented for BSpline.",
301)
302def test_spline_transformer_periodic_spline_backport():
303    """Test that the backport of extrapolate="periodic" works correctly"""
304    X = np.linspace(-2, 3.5, 10)[:, None]
305    degree = 2
306
307    # Use periodic extrapolation backport in SplineTransformer
308    transformer = SplineTransformer(
309        degree=degree, extrapolation="periodic", knots=[[-1.0], [0.0], [1.0]]
310    )
311    Xt = transformer.fit_transform(X)
312
313    # Use periodic extrapolation in BSpline
314    coef = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]])
315    spl = BSpline(np.arange(-3, 4), coef, degree, "periodic")
316    Xspl = spl(X[:, 0])
317    assert_allclose(Xt, Xspl)
318
319
320def test_spline_transformer_periodic_splines_periodicity():
321    """
322    Test if shifted knots result in the same transformation up to permutation.
323    """
324    X = np.linspace(0, 10, 101)[:, None]
325
326    transformer_1 = SplineTransformer(
327        degree=3,
328        extrapolation="periodic",
329        knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]],
330    )
331
332    transformer_2 = SplineTransformer(
333        degree=3,
334        extrapolation="periodic",
335        knots=[[1.0], [3.0], [4.0], [5.0], [8.0], [9.0]],
336    )
337
338    Xt_1 = transformer_1.fit_transform(X)
339    Xt_2 = transformer_2.fit_transform(X)
340
341    assert_allclose(Xt_1, Xt_2[:, [4, 0, 1, 2, 3]])
342
343
344@pytest.mark.parametrize("degree", [3, 5])
345def test_spline_transformer_periodic_splines_smoothness(degree):
346    """Test that spline transformation is smooth at first / last knot."""
347    X = np.linspace(-2, 10, 10_000)[:, None]
348
349    transformer = SplineTransformer(
350        degree=degree,
351        extrapolation="periodic",
352        knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]],
353    )
354    Xt = transformer.fit_transform(X)
355
356    delta = (X.max() - X.min()) / len(X)
357    tol = 10 * delta
358
359    dXt = Xt
360    # We expect splines of degree `degree` to be (`degree`-1) times
361    # continuously differentiable. I.e. for d = 0, ..., `degree` - 1 the d-th
362    # derivative should be continuous. This is the case if the (d+1)-th
363    # numerical derivative is reasonably small (smaller than `tol` in absolute
364    # value). We thus compute d-th numeric derivatives for d = 1, ..., `degree`
365    # and compare them to `tol`.
366    #
367    # Note that the 0-th derivative is the function itself, such that we are
368    # also checking its continuity.
369    for d in range(1, degree + 1):
370        # Check continuity of the (d-1)-th derivative
371        diff = np.diff(dXt, axis=0)
372        assert np.abs(diff).max() < tol
373        # Compute d-th numeric derivative
374        dXt = diff / delta
375
376    # As degree `degree` splines are not `degree` times continuously
377    # differentiable at the knots, the `degree + 1`-th numeric derivative
378    # should have spikes at the knots.
379    diff = np.diff(dXt, axis=0)
380    assert np.abs(diff).max() > 1
381
382
383@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)])
384@pytest.mark.parametrize("degree", [1, 2, 3, 4, 5])
385def test_spline_transformer_extrapolation(bias, intercept, degree):
386    """Test that B-spline extrapolation works correctly."""
387    # we use a straight line for that
388    X = np.linspace(-1, 1, 100)[:, None]
389    y = X.squeeze()
390
391    # 'constant'
392    pipe = Pipeline(
393        [
394            [
395                "spline",
396                SplineTransformer(
397                    n_knots=4,
398                    degree=degree,
399                    include_bias=bias,
400                    extrapolation="constant",
401                ),
402            ],
403            ["ols", LinearRegression(fit_intercept=intercept)],
404        ]
405    )
406    pipe.fit(X, y)
407    assert_allclose(pipe.predict([[-10], [5]]), [-1, 1])
408
409    # 'linear'
410    pipe = Pipeline(
411        [
412            [
413                "spline",
414                SplineTransformer(
415                    n_knots=4,
416                    degree=degree,
417                    include_bias=bias,
418                    extrapolation="linear",
419                ),
420            ],
421            ["ols", LinearRegression(fit_intercept=intercept)],
422        ]
423    )
424    pipe.fit(X, y)
425    assert_allclose(pipe.predict([[-10], [5]]), [-10, 5])
426
427    # 'error'
428    splt = SplineTransformer(
429        n_knots=4, degree=degree, include_bias=bias, extrapolation="error"
430    )
431    splt.fit(X)
432    with pytest.raises(ValueError):
433        splt.transform([[-10]])
434    with pytest.raises(ValueError):
435        splt.transform([[5]])
436
437
438def test_spline_transformer_kbindiscretizer():
439    """Test that a B-spline of degree=0 is equivalent to KBinsDiscretizer."""
440    rng = np.random.RandomState(97531)
441    X = rng.randn(200).reshape(200, 1)
442    n_bins = 5
443    n_knots = n_bins + 1
444
445    splt = SplineTransformer(
446        n_knots=n_knots, degree=0, knots="quantile", include_bias=True
447    )
448    splines = splt.fit_transform(X)
449
450    kbd = KBinsDiscretizer(n_bins=n_bins, encode="onehot-dense", strategy="quantile")
451    kbins = kbd.fit_transform(X)
452
453    # Though they should be exactly equal, we test approximately with high
454    # accuracy.
455    assert_allclose(splines, kbins, rtol=1e-13)
456
457
458@pytest.mark.parametrize("n_knots", [5, 10])
459@pytest.mark.parametrize("include_bias", [True, False])
460@pytest.mark.parametrize("degree", [3, 5])
461def test_spline_transformer_n_features_out(n_knots, include_bias, degree):
462    """Test that transform results in n_features_out_ features."""
463    splt = SplineTransformer(n_knots=n_knots, degree=degree, include_bias=include_bias)
464    X = np.linspace(0, 1, 10)[:, None]
465    splt.fit(X)
466
467    assert splt.transform(X).shape[1] == splt.n_features_out_
468
469
470@pytest.mark.parametrize(
471    "params, err_msg",
472    [
473        ({"degree": -1}, "degree must be a non-negative integer"),
474        ({"degree": 2.5}, "degree must be a non-negative int or tuple"),
475        ({"degree": "12"}, r"degree=\(min_degree, max_degree\) must"),
476        ({"degree": "string"}, "degree must be a non-negative int or tuple"),
477        ({"degree": (-1, 2)}, r"degree=\(min_degree, max_degree\) must"),
478        ({"degree": (0, 1.5)}, r"degree=\(min_degree, max_degree\) must"),
479        ({"degree": (3, 2)}, r"degree=\(min_degree, max_degree\) must"),
480    ],
481)
482def test_polynomial_features_input_validation(params, err_msg):
483    """Test that we raise errors for invalid input in PolynomialFeatures."""
484    X = [[1], [2]]
485
486    with pytest.raises(ValueError, match=err_msg):
487        PolynomialFeatures(**params).fit(X)
488
489
490@pytest.fixture()
491def single_feature_degree3():
492    X = np.arange(6)[:, np.newaxis]
493    P = np.hstack([np.ones_like(X), X, X ** 2, X ** 3])
494    return X, P
495
496
497@pytest.mark.parametrize(
498    "degree, include_bias, interaction_only, indices",
499    [
500        (3, True, False, slice(None, None)),
501        (3, False, False, slice(1, None)),
502        (3, True, True, [0, 1]),
503        (3, False, True, [1]),
504        ((2, 3), True, False, [0, 2, 3]),
505        ((2, 3), False, False, [2, 3]),
506        ((2, 3), True, True, [0]),
507        ((2, 3), False, True, []),
508    ],
509)
510@pytest.mark.parametrize(
511    "sparse_X",
512    [False, sparse.csr_matrix, sparse.csc_matrix],
513)
514def test_polynomial_features_one_feature(
515    single_feature_degree3,
516    degree,
517    include_bias,
518    interaction_only,
519    indices,
520    sparse_X,
521):
522    """Test PolynomialFeatures on single feature up to degree 3."""
523    X, P = single_feature_degree3
524    if sparse_X:
525        X = sparse_X(X)
526    tf = PolynomialFeatures(
527        degree=degree, include_bias=include_bias, interaction_only=interaction_only
528    ).fit(X)
529    out = tf.transform(X)
530    if sparse_X:
531        out = out.toarray()
532    assert_allclose(out, P[:, indices])
533    if tf.n_output_features_ > 0:
534        assert tf.powers_.shape == (tf.n_output_features_, tf.n_features_in_)
535
536
537@pytest.fixture()
538def two_features_degree3():
539    X = np.arange(6).reshape((3, 2))
540    x1 = X[:, :1]
541    x2 = X[:, 1:]
542    P = np.hstack(
543        [
544            x1 ** 0 * x2 ** 0,  # 0
545            x1 ** 1 * x2 ** 0,  # 1
546            x1 ** 0 * x2 ** 1,  # 2
547            x1 ** 2 * x2 ** 0,  # 3
548            x1 ** 1 * x2 ** 1,  # 4
549            x1 ** 0 * x2 ** 2,  # 5
550            x1 ** 3 * x2 ** 0,  # 6
551            x1 ** 2 * x2 ** 1,  # 7
552            x1 ** 1 * x2 ** 2,  # 8
553            x1 ** 0 * x2 ** 3,  # 9
554        ]
555    )
556    return X, P
557
558
559@pytest.mark.parametrize(
560    "degree, include_bias, interaction_only, indices",
561    [
562        (2, True, False, slice(0, 6)),
563        (2, False, False, slice(1, 6)),
564        (2, True, True, [0, 1, 2, 4]),
565        (2, False, True, [1, 2, 4]),
566        ((2, 2), True, False, [0, 3, 4, 5]),
567        ((2, 2), False, False, [3, 4, 5]),
568        ((2, 2), True, True, [0, 4]),
569        ((2, 2), False, True, [4]),
570        (3, True, False, slice(None, None)),
571        (3, False, False, slice(1, None)),
572        (3, True, True, [0, 1, 2, 4]),
573        (3, False, True, [1, 2, 4]),
574        ((2, 3), True, False, [0, 3, 4, 5, 6, 7, 8, 9]),
575        ((2, 3), False, False, slice(3, None)),
576        ((2, 3), True, True, [0, 4]),
577        ((2, 3), False, True, [4]),
578        ((3, 3), True, False, [0, 6, 7, 8, 9]),
579        ((3, 3), False, False, [6, 7, 8, 9]),
580        ((3, 3), True, True, [0]),
581        ((3, 3), False, True, []),  # would need 3 input features
582    ],
583)
584@pytest.mark.parametrize(
585    "sparse_X",
586    [False, sparse.csr_matrix, sparse.csc_matrix],
587)
588def test_polynomial_features_two_features(
589    two_features_degree3,
590    degree,
591    include_bias,
592    interaction_only,
593    indices,
594    sparse_X,
595):
596    """Test PolynomialFeatures on 2 features up to degree 3."""
597    X, P = two_features_degree3
598    if sparse_X:
599        X = sparse_X(X)
600    tf = PolynomialFeatures(
601        degree=degree, include_bias=include_bias, interaction_only=interaction_only
602    ).fit(X)
603    out = tf.transform(X)
604    if sparse_X:
605        out = out.toarray()
606    assert_allclose(out, P[:, indices])
607    if tf.n_output_features_ > 0:
608        assert tf.powers_.shape == (tf.n_output_features_, tf.n_features_in_)
609
610
611# TODO: Remove in 1.2 when get_feature_names is removed.
612@pytest.mark.filterwarnings("ignore::FutureWarning:sklearn")
613@pytest.mark.parametrize("get_names", ["get_feature_names", "get_feature_names_out"])
614def test_polynomial_feature_names(get_names):
615    X = np.arange(30).reshape(10, 3)
616    poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
617    feature_names = poly.get_feature_names()
618    assert_array_equal(
619        ["1", "x0", "x1", "x2", "x0^2", "x0 x1", "x0 x2", "x1^2", "x1 x2", "x2^2"],
620        feature_names,
621    )
622    assert len(feature_names) == poly.transform(X).shape[1]
623
624    poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
625    feature_names = getattr(poly, get_names)(["a", "b", "c"])
626    assert_array_equal(
627        [
628            "a",
629            "b",
630            "c",
631            "a^2",
632            "a b",
633            "a c",
634            "b^2",
635            "b c",
636            "c^2",
637            "a^3",
638            "a^2 b",
639            "a^2 c",
640            "a b^2",
641            "a b c",
642            "a c^2",
643            "b^3",
644            "b^2 c",
645            "b c^2",
646            "c^3",
647        ],
648        feature_names,
649    )
650    assert len(feature_names) == poly.transform(X).shape[1]
651
652    poly = PolynomialFeatures(degree=(2, 3), include_bias=False).fit(X)
653    feature_names = getattr(poly, get_names)(["a", "b", "c"])
654    assert_array_equal(
655        [
656            "a^2",
657            "a b",
658            "a c",
659            "b^2",
660            "b c",
661            "c^2",
662            "a^3",
663            "a^2 b",
664            "a^2 c",
665            "a b^2",
666            "a b c",
667            "a c^2",
668            "b^3",
669            "b^2 c",
670            "b c^2",
671            "c^3",
672        ],
673        feature_names,
674    )
675    assert len(feature_names) == poly.transform(X).shape[1]
676
677    poly = PolynomialFeatures(
678        degree=(3, 3), include_bias=True, interaction_only=True
679    ).fit(X)
680    feature_names = getattr(poly, get_names)(["a", "b", "c"])
681    assert_array_equal(["1", "a b c"], feature_names)
682    assert len(feature_names) == poly.transform(X).shape[1]
683
684    # test some unicode
685    poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
686    feature_names = poly.get_feature_names(["\u0001F40D", "\u262E", "\u05D0"])
687    assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"], feature_names)
688
689
690@pytest.mark.parametrize(
691    ["deg", "include_bias", "interaction_only", "dtype"],
692    [
693        (1, True, False, int),
694        (2, True, False, int),
695        (2, True, False, np.float32),
696        (2, True, False, np.float64),
697        (3, False, False, np.float64),
698        (3, False, True, np.float64),
699        (4, False, False, np.float64),
700        (4, False, True, np.float64),
701    ],
702)
703def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
704    rng = np.random.RandomState(0)
705    X = rng.randint(0, 2, (100, 2))
706    X_csc = sparse.csc_matrix(X)
707
708    est = PolynomialFeatures(
709        deg, include_bias=include_bias, interaction_only=interaction_only
710    )
711    Xt_csc = est.fit_transform(X_csc.astype(dtype))
712    Xt_dense = est.fit_transform(X.astype(dtype))
713
714    assert isinstance(Xt_csc, sparse.csc_matrix)
715    assert Xt_csc.dtype == Xt_dense.dtype
716    assert_array_almost_equal(Xt_csc.A, Xt_dense)
717
718
719@pytest.mark.parametrize(
720    ["deg", "include_bias", "interaction_only", "dtype"],
721    [
722        (1, True, False, int),
723        (2, True, False, int),
724        (2, True, False, np.float32),
725        (2, True, False, np.float64),
726        (3, False, False, np.float64),
727        (3, False, True, np.float64),
728    ],
729)
730def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
731    rng = np.random.RandomState(0)
732    X = rng.randint(0, 2, (100, 2))
733    X_csr = sparse.csr_matrix(X)
734
735    est = PolynomialFeatures(
736        deg, include_bias=include_bias, interaction_only=interaction_only
737    )
738    Xt_csr = est.fit_transform(X_csr.astype(dtype))
739    Xt_dense = est.fit_transform(X.astype(dtype, copy=False))
740
741    assert isinstance(Xt_csr, sparse.csr_matrix)
742    assert Xt_csr.dtype == Xt_dense.dtype
743    assert_array_almost_equal(Xt_csr.A, Xt_dense)
744
745
746@pytest.mark.parametrize("n_features", [1, 4, 5])
747@pytest.mark.parametrize(
748    "min_degree, max_degree", [(0, 1), (0, 2), (1, 3), (0, 4), (3, 4)]
749)
750@pytest.mark.parametrize("interaction_only", [True, False])
751@pytest.mark.parametrize("include_bias", [True, False])
752def test_num_combinations(
753    n_features,
754    min_degree,
755    max_degree,
756    interaction_only,
757    include_bias,
758):
759    """
760    Test that n_output_features_ is calculated correctly.
761    """
762    x = sparse.csr_matrix(([1], ([0], [n_features - 1])))
763    est = PolynomialFeatures(
764        degree=max_degree,
765        interaction_only=interaction_only,
766        include_bias=include_bias,
767    )
768    est.fit(x)
769    num_combos = est.n_output_features_
770
771    combos = PolynomialFeatures._combinations(
772        n_features=n_features,
773        min_degree=0,
774        max_degree=max_degree,
775        interaction_only=interaction_only,
776        include_bias=include_bias,
777    )
778    assert num_combos == sum([1 for _ in combos])
779
780
781@pytest.mark.parametrize(
782    ["deg", "include_bias", "interaction_only", "dtype"],
783    [
784        (2, True, False, np.float32),
785        (2, True, False, np.float64),
786        (3, False, False, np.float64),
787        (3, False, True, np.float64),
788    ],
789)
790def test_polynomial_features_csr_X_floats(deg, include_bias, interaction_only, dtype):
791    X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
792    X = X_csr.toarray()
793
794    est = PolynomialFeatures(
795        deg, include_bias=include_bias, interaction_only=interaction_only
796    )
797    Xt_csr = est.fit_transform(X_csr.astype(dtype))
798    Xt_dense = est.fit_transform(X.astype(dtype))
799
800    assert isinstance(Xt_csr, sparse.csr_matrix)
801    assert Xt_csr.dtype == Xt_dense.dtype
802    assert_array_almost_equal(Xt_csr.A, Xt_dense)
803
804
805@pytest.mark.parametrize(
806    ["zero_row_index", "deg", "interaction_only"],
807    [
808        (0, 2, True),
809        (1, 2, True),
810        (2, 2, True),
811        (0, 3, True),
812        (1, 3, True),
813        (2, 3, True),
814        (0, 2, False),
815        (1, 2, False),
816        (2, 2, False),
817        (0, 3, False),
818        (1, 3, False),
819        (2, 3, False),
820    ],
821)
822def test_polynomial_features_csr_X_zero_row(zero_row_index, deg, interaction_only):
823    X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr()
824    X_csr[zero_row_index, :] = 0.0
825    X = X_csr.toarray()
826
827    est = PolynomialFeatures(deg, include_bias=False, interaction_only=interaction_only)
828    Xt_csr = est.fit_transform(X_csr)
829    Xt_dense = est.fit_transform(X)
830
831    assert isinstance(Xt_csr, sparse.csr_matrix)
832    assert Xt_csr.dtype == Xt_dense.dtype
833    assert_array_almost_equal(Xt_csr.A, Xt_dense)
834
835
836# This degree should always be one more than the highest degree supported by
837# _csr_expansion.
838@pytest.mark.parametrize(
839    ["include_bias", "interaction_only"],
840    [(True, True), (True, False), (False, True), (False, False)],
841)
842def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
843    X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
844    X = X_csr.toarray()
845
846    est = PolynomialFeatures(
847        4, include_bias=include_bias, interaction_only=interaction_only
848    )
849    Xt_csr = est.fit_transform(X_csr)
850    Xt_dense = est.fit_transform(X)
851
852    assert isinstance(Xt_csr, sparse.csr_matrix)
853    assert Xt_csr.dtype == Xt_dense.dtype
854    assert_array_almost_equal(Xt_csr.A, Xt_dense)
855
856
857@pytest.mark.parametrize(
858    ["deg", "dim", "interaction_only"],
859    [
860        (2, 1, True),
861        (2, 2, True),
862        (3, 1, True),
863        (3, 2, True),
864        (3, 3, True),
865        (2, 1, False),
866        (2, 2, False),
867        (3, 1, False),
868        (3, 2, False),
869        (3, 3, False),
870    ],
871)
872def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
873    X_csr = sparse_random(1000, dim, 0.5, random_state=0).tocsr()
874    X = X_csr.toarray()
875
876    est = PolynomialFeatures(deg, interaction_only=interaction_only)
877    Xt_csr = est.fit_transform(X_csr)
878    Xt_dense = est.fit_transform(X)
879
880    assert isinstance(Xt_csr, sparse.csr_matrix)
881    assert Xt_csr.dtype == Xt_dense.dtype
882    assert_array_almost_equal(Xt_csr.A, Xt_dense)
883
884
885def test_polynomial_features_deprecated_n_input_features():
886    # check that we raise a deprecation warning when accessing
887    # `n_input_features_`. FIXME: remove in 1.2
888    depr_msg = (
889        "The attribute `n_input_features_` was deprecated in version "
890        "1.0 and will be removed in 1.2."
891    )
892    X = np.arange(10).reshape(5, 2)
893
894    with pytest.warns(FutureWarning, match=depr_msg):
895        PolynomialFeatures().fit(X).n_input_features_
896
897
898# TODO: Remove in 1.2 when get_feature_names is removed
899@pytest.mark.parametrize("Transformer", [SplineTransformer, PolynomialFeatures])
900def test_get_feature_names_deprecated(Transformer):
901    X = np.arange(30).reshape(10, 3)
902    poly = Transformer().fit(X)
903    msg = "get_feature_names is deprecated in 1.0"
904    with pytest.warns(FutureWarning, match=msg):
905        poly.get_feature_names()
906