1"""
2This file contains preprocessing tools based on polynomials.
3"""
4import collections
5import numbers
6from itertools import chain, combinations
7from itertools import combinations_with_replacement as combinations_w_r
8
9import numpy as np
10from scipy import sparse
11from scipy.interpolate import BSpline
12from scipy.special import comb
13
14from ..base import BaseEstimator, TransformerMixin
15from ..utils import check_array
16from ..utils.deprecation import deprecated
17from ..utils.fixes import linspace
18from ..utils.validation import check_is_fitted, FLOAT_DTYPES, _check_sample_weight
19from ..utils.validation import _check_feature_names_in
20from ..utils.stats import _weighted_percentile
21
22from ._csr_polynomial_expansion import _csr_polynomial_expansion
23
24
25__all__ = [
26    "PolynomialFeatures",
27    "SplineTransformer",
28]
29
30
31class PolynomialFeatures(TransformerMixin, BaseEstimator):
32    """Generate polynomial and interaction features.
33
34    Generate a new feature matrix consisting of all polynomial combinations
35    of the features with degree less than or equal to the specified degree.
36    For example, if an input sample is two dimensional and of the form
37    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
38
39    Read more in the :ref:`User Guide <polynomial_features>`.
40
41    Parameters
42    ----------
43    degree : int or tuple (min_degree, max_degree), default=2
44        If a single int is given, it specifies the maximal degree of the
45        polynomial features. If a tuple `(min_degree, max_degree)` is passed,
46        then `min_degree` is the minimum and `max_degree` is the maximum
47        polynomial degree of the generated features. Note that `min_degree=0`
48        and `min_degree=1` are equivalent as outputting the degree zero term is
49        determined by `include_bias`.
50
51    interaction_only : bool, default=False
52        If `True`, only interaction features are produced: features that are
53        products of at most `degree` *distinct* input features, i.e. terms with
54        power of 2 or higher of the same input feature are excluded:
55
56            - included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.
57            - excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc.
58
59    include_bias : bool, default=True
60        If `True` (default), then include a bias column, the feature in which
61        all polynomial powers are zero (i.e. a column of ones - acts as an
62        intercept term in a linear model).
63
64    order : {'C', 'F'}, default='C'
65        Order of output array in the dense case. `'F'` order is faster to
66        compute, but may slow down subsequent estimators.
67
68        .. versionadded:: 0.21
69
70    Attributes
71    ----------
72    powers_ : ndarray of shape (`n_output_features_`, `n_features_in_`)
73        `powers_[i, j]` is the exponent of the jth input in the ith output.
74
75    n_input_features_ : int
76        The total number of input features.
77
78        .. deprecated:: 1.0
79            This attribute is deprecated in 1.0 and will be removed in 1.2.
80            Refer to `n_features_in_` instead.
81
82    n_features_in_ : int
83        Number of features seen during :term:`fit`.
84
85        .. versionadded:: 0.24
86
87    feature_names_in_ : ndarray of shape (`n_features_in_`,)
88        Names of features seen during :term:`fit`. Defined only when `X`
89        has feature names that are all strings.
90
91        .. versionadded:: 1.0
92
93    n_output_features_ : int
94        The total number of polynomial output features. The number of output
95        features is computed by iterating over all suitably sized combinations
96        of input features.
97
98    See Also
99    --------
100    SplineTransformer : Transformer that generates univariate B-spline bases
101        for features.
102
103    Notes
104    -----
105    Be aware that the number of features in the output array scales
106    polynomially in the number of features of the input array, and
107    exponentially in the degree. High degrees can cause overfitting.
108
109    See :ref:`examples/linear_model/plot_polynomial_interpolation.py
110    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
111
112    Examples
113    --------
114    >>> import numpy as np
115    >>> from sklearn.preprocessing import PolynomialFeatures
116    >>> X = np.arange(6).reshape(3, 2)
117    >>> X
118    array([[0, 1],
119           [2, 3],
120           [4, 5]])
121    >>> poly = PolynomialFeatures(2)
122    >>> poly.fit_transform(X)
123    array([[ 1.,  0.,  1.,  0.,  0.,  1.],
124           [ 1.,  2.,  3.,  4.,  6.,  9.],
125           [ 1.,  4.,  5., 16., 20., 25.]])
126    >>> poly = PolynomialFeatures(interaction_only=True)
127    >>> poly.fit_transform(X)
128    array([[ 1.,  0.,  1.,  0.],
129           [ 1.,  2.,  3.,  6.],
130           [ 1.,  4.,  5., 20.]])
131    """
132
133    def __init__(
134        self, degree=2, *, interaction_only=False, include_bias=True, order="C"
135    ):
136        self.degree = degree
137        self.interaction_only = interaction_only
138        self.include_bias = include_bias
139        self.order = order
140
141    @staticmethod
142    def _combinations(
143        n_features, min_degree, max_degree, interaction_only, include_bias
144    ):
145        comb = combinations if interaction_only else combinations_w_r
146        start = max(1, min_degree)
147        iter = chain.from_iterable(
148            comb(range(n_features), i) for i in range(start, max_degree + 1)
149        )
150        if include_bias:
151            iter = chain(comb(range(n_features), 0), iter)
152        return iter
153
154    @staticmethod
155    def _num_combinations(
156        n_features, min_degree, max_degree, interaction_only, include_bias
157    ):
158        """Calculate number of terms in polynomial expansion
159
160        This should be equivalent to counting the number of terms returned by
161        _combinations(...) but much faster.
162        """
163
164        if interaction_only:
165            combinations = sum(
166                [
167                    comb(n_features, i, exact=True)
168                    for i in range(max(1, min_degree), min(max_degree, n_features) + 1)
169                ]
170            )
171        else:
172            combinations = comb(n_features + max_degree, max_degree, exact=True) - 1
173            if min_degree > 0:
174                d = min_degree - 1
175                combinations -= comb(n_features + d, d, exact=True) - 1
176
177        if include_bias:
178            combinations += 1
179
180        return combinations
181
182    @property
183    def powers_(self):
184        """Exponent for each of the inputs in the output."""
185        check_is_fitted(self)
186
187        combinations = self._combinations(
188            n_features=self.n_features_in_,
189            min_degree=self._min_degree,
190            max_degree=self._max_degree,
191            interaction_only=self.interaction_only,
192            include_bias=self.include_bias,
193        )
194        return np.vstack(
195            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]
196        )
197
198    @deprecated(
199        "get_feature_names is deprecated in 1.0 and will be removed "
200        "in 1.2. Please use get_feature_names_out instead."
201    )
202    def get_feature_names(self, input_features=None):
203        """Return feature names for output features.
204
205        Parameters
206        ----------
207        input_features : list of str of shape (n_features,), default=None
208            String names for input features if available. By default,
209            "x0", "x1", ... "xn_features" is used.
210
211        Returns
212        -------
213        output_feature_names : list of str of shape (n_output_features,)
214            Transformed feature names.
215        """
216        powers = self.powers_
217        if input_features is None:
218            input_features = ["x%d" % i for i in range(powers.shape[1])]
219        feature_names = []
220        for row in powers:
221            inds = np.where(row)[0]
222            if len(inds):
223                name = " ".join(
224                    "%s^%d" % (input_features[ind], exp)
225                    if exp != 1
226                    else input_features[ind]
227                    for ind, exp in zip(inds, row[inds])
228                )
229            else:
230                name = "1"
231            feature_names.append(name)
232        return feature_names
233
234    def get_feature_names_out(self, input_features=None):
235        """Get output feature names for transformation.
236
237        Parameters
238        ----------
239        input_features : array-like of str or None, default=None
240            Input features.
241
242            - If `input_features is None`, then `feature_names_in_` is
243              used as feature names in. If `feature_names_in_` is not defined,
244              then names are generated: `[x0, x1, ..., x(n_features_in_)]`.
245            - If `input_features` is an array-like, then `input_features` must
246              match `feature_names_in_` if `feature_names_in_` is defined.
247
248        Returns
249        -------
250        feature_names_out : ndarray of str objects
251            Transformed feature names.
252        """
253        powers = self.powers_
254        input_features = _check_feature_names_in(self, input_features)
255        feature_names = []
256        for row in powers:
257            inds = np.where(row)[0]
258            if len(inds):
259                name = " ".join(
260                    "%s^%d" % (input_features[ind], exp)
261                    if exp != 1
262                    else input_features[ind]
263                    for ind, exp in zip(inds, row[inds])
264                )
265            else:
266                name = "1"
267            feature_names.append(name)
268        return np.asarray(feature_names, dtype=object)
269
270    def fit(self, X, y=None):
271        """
272        Compute number of output features.
273
274        Parameters
275        ----------
276        X : {array-like, sparse matrix} of shape (n_samples, n_features)
277            The data.
278
279        y : Ignored
280            Not used, present here for API consistency by convention.
281
282        Returns
283        -------
284        self : object
285            Fitted transformer.
286        """
287        _, n_features = self._validate_data(X, accept_sparse=True).shape
288
289        if isinstance(self.degree, numbers.Integral):
290            if self.degree < 0:
291                raise ValueError(
292                    f"degree must be a non-negative integer, got {self.degree}."
293                )
294            self._min_degree = 0
295            self._max_degree = self.degree
296        elif (
297            isinstance(self.degree, collections.abc.Iterable) and len(self.degree) == 2
298        ):
299            self._min_degree, self._max_degree = self.degree
300            if not (
301                isinstance(self._min_degree, numbers.Integral)
302                and isinstance(self._max_degree, numbers.Integral)
303                and self._min_degree >= 0
304                and self._min_degree <= self._max_degree
305            ):
306                raise ValueError(
307                    "degree=(min_degree, max_degree) must "
308                    "be non-negative integers that fulfil "
309                    "min_degree <= max_degree, got "
310                    f"{self.degree}."
311                )
312        else:
313            raise ValueError(
314                "degree must be a non-negative int or tuple "
315                "(min_degree, max_degree), got "
316                f"{self.degree}."
317            )
318
319        self.n_output_features_ = self._num_combinations(
320            n_features=n_features,
321            min_degree=self._min_degree,
322            max_degree=self._max_degree,
323            interaction_only=self.interaction_only,
324            include_bias=self.include_bias,
325        )
326        # We also record the number of output features for
327        # _max_degree = 0
328        self._n_out_full = self._num_combinations(
329            n_features=n_features,
330            min_degree=0,
331            max_degree=self._max_degree,
332            interaction_only=self.interaction_only,
333            include_bias=self.include_bias,
334        )
335
336        return self
337
338    def transform(self, X):
339        """Transform data to polynomial features.
340
341        Parameters
342        ----------
343        X : {array-like, sparse matrix} of shape (n_samples, n_features)
344            The data to transform, row by row.
345
346            Prefer CSR over CSC for sparse input (for speed), but CSC is
347            required if the degree is 4 or higher. If the degree is less than
348            4 and the input format is CSC, it will be converted to CSR, have
349            its polynomial features generated, then converted back to CSC.
350
351            If the degree is 2 or 3, the method described in "Leveraging
352            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
353            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
354            used, which is much faster than the method used on CSC input. For
355            this reason, a CSC input will be converted to CSR, and the output
356            will be converted back to CSC prior to being returned, hence the
357            preference of CSR.
358
359        Returns
360        -------
361        XP : {ndarray, sparse matrix} of shape (n_samples, NP)
362            The matrix of features, where `NP` is the number of polynomial
363            features generated from the combination of inputs. If a sparse
364            matrix is provided, it will be converted into a sparse
365            `csr_matrix`.
366        """
367        check_is_fitted(self)
368
369        X = self._validate_data(
370            X, order="F", dtype=FLOAT_DTYPES, reset=False, accept_sparse=("csr", "csc")
371        )
372
373        n_samples, n_features = X.shape
374
375        if sparse.isspmatrix_csr(X):
376            if self._max_degree > 3:
377                return self.transform(X.tocsc()).tocsr()
378            to_stack = []
379            if self.include_bias:
380                to_stack.append(
381                    sparse.csc_matrix(np.ones(shape=(n_samples, 1), dtype=X.dtype))
382                )
383            if self._min_degree <= 1:
384                to_stack.append(X)
385            for deg in range(max(2, self._min_degree), self._max_degree + 1):
386                Xp_next = _csr_polynomial_expansion(
387                    X.data, X.indices, X.indptr, X.shape[1], self.interaction_only, deg
388                )
389                if Xp_next is None:
390                    break
391                to_stack.append(Xp_next)
392            if len(to_stack) == 0:
393                # edge case: deal with empty matrix
394                XP = sparse.csr_matrix((n_samples, 0), dtype=X.dtype)
395            else:
396                XP = sparse.hstack(to_stack, format="csr")
397        elif sparse.isspmatrix_csc(X) and self._max_degree < 4:
398            return self.transform(X.tocsr()).tocsc()
399        elif sparse.isspmatrix(X):
400            combinations = self._combinations(
401                n_features=n_features,
402                min_degree=self._min_degree,
403                max_degree=self._max_degree,
404                interaction_only=self.interaction_only,
405                include_bias=self.include_bias,
406            )
407            columns = []
408            for combi in combinations:
409                if combi:
410                    out_col = 1
411                    for col_idx in combi:
412                        out_col = X[:, col_idx].multiply(out_col)
413                    columns.append(out_col)
414                else:
415                    bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
416                    columns.append(bias)
417            XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
418        else:
419            # Do as if _min_degree = 0 and cut down array after the
420            # computation, i.e. use _n_out_full instead of n_output_features_.
421            XP = np.empty(
422                shape=(n_samples, self._n_out_full), dtype=X.dtype, order=self.order
423            )
424
425            # What follows is a faster implementation of:
426            # for i, comb in enumerate(combinations):
427            #     XP[:, i] = X[:, comb].prod(1)
428            # This implementation uses two optimisations.
429            # First one is broadcasting,
430            # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
431            # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
432            # ...
433            # multiply ([X[:, start:end], X[:, start]) -> ...
434            # Second optimisation happens for degrees >= 3.
435            # Xi^3 is computed reusing previous computation:
436            # Xi^3 = Xi^2 * Xi.
437
438            # degree 0 term
439            if self.include_bias:
440                XP[:, 0] = 1
441                current_col = 1
442            else:
443                current_col = 0
444
445            # degree 1 term
446            XP[:, current_col : current_col + n_features] = X
447            index = list(range(current_col, current_col + n_features))
448            current_col += n_features
449            index.append(current_col)
450
451            # loop over degree >= 2 terms
452            for _ in range(2, self._max_degree + 1):
453                new_index = []
454                end = index[-1]
455                for feature_idx in range(n_features):
456                    start = index[feature_idx]
457                    new_index.append(current_col)
458                    if self.interaction_only:
459                        start += index[feature_idx + 1] - index[feature_idx]
460                    next_col = current_col + end - start
461                    if next_col <= current_col:
462                        break
463                    # XP[:, start:end] are terms of degree d - 1
464                    # that exclude feature #feature_idx.
465                    np.multiply(
466                        XP[:, start:end],
467                        X[:, feature_idx : feature_idx + 1],
468                        out=XP[:, current_col:next_col],
469                        casting="no",
470                    )
471                    current_col = next_col
472
473                new_index.append(current_col)
474                index = new_index
475
476            if self._min_degree > 1:
477                n_XP, n_Xout = self._n_out_full, self.n_output_features_
478                if self.include_bias:
479                    Xout = np.empty(
480                        shape=(n_samples, n_Xout), dtype=XP.dtype, order=self.order
481                    )
482                    Xout[:, 0] = 1
483                    Xout[:, 1:] = XP[:, n_XP - n_Xout + 1 :]
484                else:
485                    Xout = XP[:, n_XP - n_Xout :].copy()
486                XP = Xout
487        return XP
488
489    # TODO: Remove in 1.2
490    # mypy error: Decorated property not supported
491    @deprecated(  # type: ignore
492        "The attribute `n_input_features_` was "
493        "deprecated in version 1.0 and will be removed in 1.2."
494    )
495    @property
496    def n_input_features_(self):
497        return self.n_features_in_
498
499
500# TODO:
501# - sparse support (either scipy or own cython solution)?
502class SplineTransformer(TransformerMixin, BaseEstimator):
503    """Generate univariate B-spline bases for features.
504
505    Generate a new feature matrix consisting of
506    `n_splines=n_knots + degree - 1` (`n_knots - 1` for
507    `extrapolation="periodic"`) spline basis functions
508    (B-splines) of polynomial order=`degree` for each feature.
509
510    Read more in the :ref:`User Guide <spline_transformer>`.
511
512    .. versionadded:: 1.0
513
514    Parameters
515    ----------
516    n_knots : int, default=5
517        Number of knots of the splines if `knots` equals one of
518        {'uniform', 'quantile'}. Must be larger or equal 2. Ignored if `knots`
519        is array-like.
520
521    degree : int, default=3
522        The polynomial degree of the spline basis. Must be a non-negative
523        integer.
524
525    knots : {'uniform', 'quantile'} or array-like of shape \
526        (n_knots, n_features), default='uniform'
527        Set knot positions such that first knot <= features <= last knot.
528
529        - If 'uniform', `n_knots` number of knots are distributed uniformly
530          from min to max values of the features.
531        - If 'quantile', they are distributed uniformly along the quantiles of
532          the features.
533        - If an array-like is given, it directly specifies the sorted knot
534          positions including the boundary knots. Note that, internally,
535          `degree` number of knots are added before the first knot, the same
536          after the last knot.
537
538    extrapolation : {'error', 'constant', 'linear', 'continue', 'periodic'}, \
539        default='constant'
540        If 'error', values outside the min and max values of the training
541        features raises a `ValueError`. If 'constant', the value of the
542        splines at minimum and maximum value of the features is used as
543        constant extrapolation. If 'linear', a linear extrapolation is used.
544        If 'continue', the splines are extrapolated as is, i.e. option
545        `extrapolate=True` in :class:`scipy.interpolate.BSpline`. If
546        'periodic', periodic splines with a periodicity equal to the distance
547        between the first and last knot are used. Periodic splines enforce
548        equal function values and derivatives at the first and last knot.
549        For example, this makes it possible to avoid introducing an arbitrary
550        jump between Dec 31st and Jan 1st in spline features derived from a
551        naturally periodic "day-of-year" input feature. In this case it is
552        recommended to manually set the knot values to control the period.
553
554    include_bias : bool, default=True
555        If True (default), then the last spline element inside the data range
556        of a feature is dropped. As B-splines sum to one over the spline basis
557        functions for each data point, they implicitly include a bias term,
558        i.e. a column of ones. It acts as an intercept term in a linear models.
559
560    order : {'C', 'F'}, default='C'
561        Order of output array. 'F' order is faster to compute, but may slow
562        down subsequent estimators.
563
564    Attributes
565    ----------
566    bsplines_ : list of shape (n_features,)
567        List of BSplines objects, one for each feature.
568
569    n_features_in_ : int
570        The total number of input features.
571
572    feature_names_in_ : ndarray of shape (`n_features_in_`,)
573        Names of features seen during :term:`fit`. Defined only when `X`
574        has feature names that are all strings.
575
576        .. versionadded:: 1.0
577
578    n_features_out_ : int
579        The total number of output features, which is computed as
580        `n_features * n_splines`, where `n_splines` is
581        the number of bases elements of the B-splines,
582        `n_knots + degree - 1` for non-periodic splines and
583        `n_knots - 1` for periodic ones.
584        If `include_bias=False`, then it is only
585        `n_features * (n_splines - 1)`.
586
587    See Also
588    --------
589    KBinsDiscretizer : Transformer that bins continuous data into intervals.
590
591    PolynomialFeatures : Transformer that generates polynomial and interaction
592        features.
593
594    Notes
595    -----
596    High degrees and a high number of knots can cause overfitting.
597
598    See :ref:`examples/linear_model/plot_polynomial_interpolation.py
599    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`.
600
601    Examples
602    --------
603    >>> import numpy as np
604    >>> from sklearn.preprocessing import SplineTransformer
605    >>> X = np.arange(6).reshape(6, 1)
606    >>> spline = SplineTransformer(degree=2, n_knots=3)
607    >>> spline.fit_transform(X)
608    array([[0.5 , 0.5 , 0.  , 0.  ],
609           [0.18, 0.74, 0.08, 0.  ],
610           [0.02, 0.66, 0.32, 0.  ],
611           [0.  , 0.32, 0.66, 0.02],
612           [0.  , 0.08, 0.74, 0.18],
613           [0.  , 0.  , 0.5 , 0.5 ]])
614    """
615
616    def __init__(
617        self,
618        n_knots=5,
619        degree=3,
620        *,
621        knots="uniform",
622        extrapolation="constant",
623        include_bias=True,
624        order="C",
625    ):
626        self.n_knots = n_knots
627        self.degree = degree
628        self.knots = knots
629        self.extrapolation = extrapolation
630        self.include_bias = include_bias
631        self.order = order
632
633    @staticmethod
634    def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None):
635        """Calculate base knot positions.
636
637        Base knots such that first knot <= feature <= last knot. For the
638        B-spline construction with scipy.interpolate.BSpline, 2*degree knots
639        beyond the base interval are added.
640
641        Returns
642        -------
643        knots : ndarray of shape (n_knots, n_features), dtype=np.float64
644            Knot positions (points) of base interval.
645        """
646        if knots == "quantile":
647            percentiles = 100 * np.linspace(
648                start=0, stop=1, num=n_knots, dtype=np.float64
649            )
650
651            if sample_weight is None:
652                knots = np.percentile(X, percentiles, axis=0)
653            else:
654                knots = np.array(
655                    [
656                        _weighted_percentile(X, sample_weight, percentile)
657                        for percentile in percentiles
658                    ]
659                )
660
661        else:
662            # knots == 'uniform':
663            # Note that the variable `knots` has already been validated and
664            # `else` is therefore safe.
665            # Disregard observations with zero weight.
666            mask = slice(None, None, 1) if sample_weight is None else sample_weight > 0
667            x_min = np.amin(X[mask], axis=0)
668            x_max = np.amax(X[mask], axis=0)
669
670            knots = linspace(
671                start=x_min,
672                stop=x_max,
673                num=n_knots,
674                endpoint=True,
675                dtype=np.float64,
676            )
677
678        return knots
679
680    @deprecated(
681        "get_feature_names is deprecated in 1.0 and will be removed "
682        "in 1.2. Please use get_feature_names_out instead."
683    )
684    def get_feature_names(self, input_features=None):
685        """Return feature names for output features.
686
687        Parameters
688        ----------
689        input_features : list of str of shape (n_features,), default=None
690            String names for input features if available. By default,
691            "x0", "x1", ... "xn_features" is used.
692
693        Returns
694        -------
695        output_feature_names : list of str of shape (n_output_features,)
696            Transformed feature names.
697        """
698        n_splines = self.bsplines_[0].c.shape[0]
699        if input_features is None:
700            input_features = ["x%d" % i for i in range(self.n_features_in_)]
701        feature_names = []
702        for i in range(self.n_features_in_):
703            for j in range(n_splines - 1 + self.include_bias):
704                feature_names.append(f"{input_features[i]}_sp_{j}")
705        return feature_names
706
707    def get_feature_names_out(self, input_features=None):
708        """Get output feature names for transformation.
709
710        Parameters
711        ----------
712        input_features : array-like of str or None, default=None
713            Input features.
714
715            - If `input_features` is `None`, then `feature_names_in_` is
716              used as feature names in. If `feature_names_in_` is not defined,
717              then names are generated: `[x0, x1, ..., x(n_features_in_)]`.
718            - If `input_features` is an array-like, then `input_features` must
719              match `feature_names_in_` if `feature_names_in_` is defined.
720
721        Returns
722        -------
723        feature_names_out : ndarray of str objects
724            Transformed feature names.
725        """
726        n_splines = self.bsplines_[0].c.shape[0]
727        input_features = _check_feature_names_in(self, input_features)
728        feature_names = []
729        for i in range(self.n_features_in_):
730            for j in range(n_splines - 1 + self.include_bias):
731                feature_names.append(f"{input_features[i]}_sp_{j}")
732        return np.asarray(feature_names, dtype=object)
733
734    def fit(self, X, y=None, sample_weight=None):
735        """Compute knot positions of splines.
736
737        Parameters
738        ----------
739        X : array-like of shape (n_samples, n_features)
740            The data.
741
742        y : None
743            Ignored.
744
745        sample_weight : array-like of shape (n_samples,), default = None
746            Individual weights for each sample. Used to calculate quantiles if
747            `knots="quantile"`. For `knots="uniform"`, zero weighted
748            observations are ignored for finding the min and max of `X`.
749
750        Returns
751        -------
752        self : object
753            Fitted transformer.
754        """
755        X = self._validate_data(
756            X,
757            reset=True,
758            accept_sparse=False,
759            ensure_min_samples=2,
760            ensure_2d=True,
761        )
762        if sample_weight is not None:
763            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
764
765        _, n_features = X.shape
766
767        if not (isinstance(self.degree, numbers.Integral) and self.degree >= 0):
768            raise ValueError(
769                f"degree must be a non-negative integer, got {self.degree}."
770            )
771
772        if isinstance(self.knots, str) and self.knots in [
773            "uniform",
774            "quantile",
775        ]:
776            if not (isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2):
777                raise ValueError(
778                    f"n_knots must be a positive integer >= 2, got: {self.n_knots}"
779                )
780
781            base_knots = self._get_base_knot_positions(
782                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight
783            )
784        else:
785            base_knots = check_array(self.knots, dtype=np.float64)
786            if base_knots.shape[0] < 2:
787                raise ValueError("Number of knots, knots.shape[0], must be >= 2.")
788            elif base_knots.shape[1] != n_features:
789                raise ValueError("knots.shape[1] == n_features is violated.")
790            elif not np.all(np.diff(base_knots, axis=0) > 0):
791                raise ValueError("knots must be sorted without duplicates.")
792
793        if self.extrapolation not in (
794            "error",
795            "constant",
796            "linear",
797            "continue",
798            "periodic",
799        ):
800            raise ValueError(
801                "extrapolation must be one of 'error', "
802                "'constant', 'linear', 'continue' or 'periodic'."
803            )
804
805        if not isinstance(self.include_bias, (bool, np.bool_)):
806            raise ValueError("include_bias must be bool.")
807
808        # number of knots for base interval
809        n_knots = base_knots.shape[0]
810
811        if self.extrapolation == "periodic" and n_knots <= self.degree:
812            raise ValueError(
813                "Periodic splines require degree < n_knots. Got n_knots="
814                f"{n_knots} and degree={self.degree}."
815            )
816
817        # number of splines basis functions
818        if self.extrapolation != "periodic":
819            n_splines = n_knots + self.degree - 1
820        else:
821            # periodic splines have self.degree less degrees of freedom
822            n_splines = n_knots - 1
823
824        degree = self.degree
825        n_out = n_features * n_splines
826        # We have to add degree number of knots below, and degree number knots
827        # above the base knots in order to make the spline basis complete.
828        if self.extrapolation == "periodic":
829            # For periodic splines the spacing of the first / last degree knots
830            # needs to be a continuation of the spacing of the last / first
831            # base knots.
832            period = base_knots[-1] - base_knots[0]
833            knots = np.r_[
834                base_knots[-(degree + 1) : -1] - period,
835                base_knots,
836                base_knots[1 : (degree + 1)] + period,
837            ]
838
839        else:
840            # Eilers & Marx in "Flexible smoothing with B-splines and
841            # penalties" https://doi.org/10.1214/ss/1038425655 advice
842            # against repeating first and last knot several times, which
843            # would have inferior behaviour at boundaries if combined with
844            # a penalty (hence P-Spline). We follow this advice even if our
845            # splines are unpenalized. Meaning we do not:
846            # knots = np.r_[
847            #     np.tile(base_knots.min(axis=0), reps=[degree, 1]),
848            #     base_knots,
849            #     np.tile(base_knots.max(axis=0), reps=[degree, 1])
850            # ]
851            # Instead, we reuse the distance of the 2 fist/last knots.
852            dist_min = base_knots[1] - base_knots[0]
853            dist_max = base_knots[-1] - base_knots[-2]
854
855            knots = np.r_[
856                linspace(
857                    base_knots[0] - degree * dist_min,
858                    base_knots[0] - dist_min,
859                    num=degree,
860                ),
861                base_knots,
862                linspace(
863                    base_knots[-1] + dist_max,
864                    base_knots[-1] + degree * dist_max,
865                    num=degree,
866                ),
867            ]
868
869        # With a diagonal coefficient matrix, we get back the spline basis
870        # elements, i.e. the design matrix of the spline.
871        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.
872        coef = np.eye(n_splines, dtype=np.float64)
873        if self.extrapolation == "periodic":
874            coef = np.concatenate((coef, coef[:degree, :]))
875
876        extrapolate = self.extrapolation in ["periodic", "continue"]
877
878        bsplines = [
879            BSpline.construct_fast(
880                knots[:, i], coef, self.degree, extrapolate=extrapolate
881            )
882            for i in range(n_features)
883        ]
884        self.bsplines_ = bsplines
885
886        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)
887        return self
888
889    def transform(self, X):
890        """Transform each feature data to B-splines.
891
892        Parameters
893        ----------
894        X : array-like of shape (n_samples, n_features)
895            The data to transform.
896
897        Returns
898        -------
899        XBS : ndarray of shape (n_samples, n_features * n_splines)
900            The matrix of features, where n_splines is the number of bases
901            elements of the B-splines, n_knots + degree - 1.
902        """
903        check_is_fitted(self)
904
905        X = self._validate_data(X, reset=False, accept_sparse=False, ensure_2d=True)
906
907        n_samples, n_features = X.shape
908        n_splines = self.bsplines_[0].c.shape[1]
909        degree = self.degree
910
911        # Note that scipy BSpline returns float64 arrays and converts input
912        # x=X[:, i] to c-contiguous float64.
913        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)
914        if X.dtype in FLOAT_DTYPES:
915            dtype = X.dtype
916        else:
917            dtype = np.float64
918        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)
919
920        for i in range(n_features):
921            spl = self.bsplines_[i]
922
923            if self.extrapolation in ("continue", "error", "periodic"):
924
925                if self.extrapolation == "periodic":
926                    # With periodic extrapolation we map x to the segment
927                    # [spl.t[k], spl.t[n]].
928                    # This is equivalent to BSpline(.., extrapolate="periodic")
929                    # for scipy>=1.0.0.
930                    n = spl.t.size - spl.k - 1
931                    # Assign to new array to avoid inplace operation
932                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (
933                        spl.t[n] - spl.t[spl.k]
934                    )
935                else:
936                    x = X[:, i]
937
938                XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)
939
940            else:
941                xmin = spl.t[degree]
942                xmax = spl.t[-degree - 1]
943                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)
944                XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])
945
946            # Note for extrapolation:
947            # 'continue' is already returned as is by scipy BSplines
948            if self.extrapolation == "error":
949                # BSpline with extrapolate=False does not raise an error, but
950                # output np.nan.
951                if np.any(np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])):
952                    raise ValueError(
953                        "X contains values beyond the limits of the knots."
954                    )
955            elif self.extrapolation == "constant":
956                # Set all values beyond xmin and xmax to the value of the
957                # spline basis functions at those two positions.
958                # Only the first degree and last degree number of splines
959                # have non-zero values at the boundaries.
960
961                # spline values at boundaries
962                f_min = spl(xmin)
963                f_max = spl(xmax)
964                mask = X[:, i] < xmin
965                if np.any(mask):
966                    XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[
967                        :degree
968                    ]
969
970                mask = X[:, i] > xmax
971                if np.any(mask):
972                    XBS[
973                        mask,
974                        ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),
975                    ] = f_max[-degree:]
976
977            elif self.extrapolation == "linear":
978                # Continue the degree first and degree last spline bases
979                # linearly beyond the boundaries, with slope = derivative at
980                # the boundary.
981                # Note that all others have derivative = value = 0 at the
982                # boundaries.
983
984                # spline values at boundaries
985                f_min, f_max = spl(xmin), spl(xmax)
986                # spline derivatives = slopes at boundaries
987                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)
988                # Compute the linear continuation.
989                if degree <= 1:
990                    # For degree=1, the derivative of 2nd spline is not zero at
991                    # boundary. For degree=0 it is the same as 'constant'.
992                    degree += 1
993                for j in range(degree):
994                    mask = X[:, i] < xmin
995                    if np.any(mask):
996                        XBS[mask, i * n_splines + j] = (
997                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]
998                        )
999
1000                    mask = X[:, i] > xmax
1001                    if np.any(mask):
1002                        k = n_splines - 1 - j
1003                        XBS[mask, i * n_splines + k] = (
1004                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]
1005                        )
1006
1007        if self.include_bias:
1008            return XBS
1009        else:
1010            # We throw away one spline basis per feature.
1011            # We chose the last one.
1012            indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]
1013            return XBS[:, indices]
1014