1"""
2The :mod:`sklearn.pls` module implements Partial Least Squares (PLS).
3"""
4
5# Author: Edouard Duchesnay <edouard.duchesnay@cea.fr>
6# License: BSD 3 clause
7
8import warnings
9from abc import ABCMeta, abstractmethod
10
11import numpy as np
12from scipy.linalg import svd
13
14from ..base import BaseEstimator, RegressorMixin, TransformerMixin
15from ..base import MultiOutputMixin
16from ..utils import check_array, check_consistent_length
17from ..utils.fixes import sp_version
18from ..utils.fixes import parse_version
19from ..utils.extmath import svd_flip
20from ..utils.validation import check_is_fitted, FLOAT_DTYPES
21from ..exceptions import ConvergenceWarning
22from ..utils.deprecation import deprecated
23
24__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"]
25
26
27if sp_version >= parse_version("1.7"):
28    # Starting in scipy 1.7 pinv2 was deprecated in favor of pinv.
29    # pinv now uses the svd to compute the pseudo-inverse.
30    from scipy.linalg import pinv as pinv2
31else:
32    from scipy.linalg import pinv2
33
34
35def _pinv2_old(a):
36    # Used previous scipy pinv2 that was updated in:
37    # https://github.com/scipy/scipy/pull/10067
38    # We can not set `cond` or `rcond` for pinv2 in scipy >= 1.3 to keep the
39    # same behavior of pinv2 for scipy < 1.3, because the condition used to
40    # determine the rank is dependent on the output of svd.
41    u, s, vh = svd(a, full_matrices=False, check_finite=False)
42
43    t = u.dtype.char.lower()
44    factor = {"f": 1e3, "d": 1e6}
45    cond = np.max(s) * factor[t] * np.finfo(t).eps
46    rank = np.sum(s > cond)
47
48    u = u[:, :rank]
49    u /= s[:rank]
50    return np.transpose(np.conjugate(np.dot(u, vh[:rank])))
51
52
53def _get_first_singular_vectors_power_method(
54    X, Y, mode="A", max_iter=500, tol=1e-06, norm_y_weights=False
55):
56    """Return the first left and right singular vectors of X'Y.
57
58    Provides an alternative to the svd(X'Y) and uses the power method instead.
59    With norm_y_weights to True and in mode A, this corresponds to the
60    algorithm section 11.3 of the Wegelin's review, except this starts at the
61    "update saliences" part.
62    """
63
64    eps = np.finfo(X.dtype).eps
65    try:
66        y_score = next(col for col in Y.T if np.any(np.abs(col) > eps))
67    except StopIteration as e:
68        raise StopIteration("Y residual is constant") from e
69
70    x_weights_old = 100  # init to big value for first convergence check
71
72    if mode == "B":
73        # Precompute pseudo inverse matrices
74        # Basically: X_pinv = (X.T X)^-1 X.T
75        # Which requires inverting a (n_features, n_features) matrix.
76        # As a result, and as detailed in the Wegelin's review, CCA (i.e. mode
77        # B) will be unstable if n_features > n_samples or n_targets >
78        # n_samples
79        X_pinv, Y_pinv = _pinv2_old(X), _pinv2_old(Y)
80
81    for i in range(max_iter):
82        if mode == "B":
83            x_weights = np.dot(X_pinv, y_score)
84        else:
85            x_weights = np.dot(X.T, y_score) / np.dot(y_score, y_score)
86
87        x_weights /= np.sqrt(np.dot(x_weights, x_weights)) + eps
88        x_score = np.dot(X, x_weights)
89
90        if mode == "B":
91            y_weights = np.dot(Y_pinv, x_score)
92        else:
93            y_weights = np.dot(Y.T, x_score) / np.dot(x_score.T, x_score)
94
95        if norm_y_weights:
96            y_weights /= np.sqrt(np.dot(y_weights, y_weights)) + eps
97
98        y_score = np.dot(Y, y_weights) / (np.dot(y_weights, y_weights) + eps)
99
100        x_weights_diff = x_weights - x_weights_old
101        if np.dot(x_weights_diff, x_weights_diff) < tol or Y.shape[1] == 1:
102            break
103        x_weights_old = x_weights
104
105    n_iter = i + 1
106    if n_iter == max_iter:
107        warnings.warn("Maximum number of iterations reached", ConvergenceWarning)
108
109    return x_weights, y_weights, n_iter
110
111
112def _get_first_singular_vectors_svd(X, Y):
113    """Return the first left and right singular vectors of X'Y.
114
115    Here the whole SVD is computed.
116    """
117    C = np.dot(X.T, Y)
118    U, _, Vt = svd(C, full_matrices=False)
119    return U[:, 0], Vt[0, :]
120
121
122def _center_scale_xy(X, Y, scale=True):
123    """Center X, Y and scale if the scale parameter==True
124
125    Returns
126    -------
127        X, Y, x_mean, y_mean, x_std, y_std
128    """
129    # center
130    x_mean = X.mean(axis=0)
131    X -= x_mean
132    y_mean = Y.mean(axis=0)
133    Y -= y_mean
134    # scale
135    if scale:
136        x_std = X.std(axis=0, ddof=1)
137        x_std[x_std == 0.0] = 1.0
138        X /= x_std
139        y_std = Y.std(axis=0, ddof=1)
140        y_std[y_std == 0.0] = 1.0
141        Y /= y_std
142    else:
143        x_std = np.ones(X.shape[1])
144        y_std = np.ones(Y.shape[1])
145    return X, Y, x_mean, y_mean, x_std, y_std
146
147
148def _svd_flip_1d(u, v):
149    """Same as svd_flip but works on 1d arrays, and is inplace"""
150    # svd_flip would force us to convert to 2d array and would also return 2d
151    # arrays. We don't want that.
152    biggest_abs_val_idx = np.argmax(np.abs(u))
153    sign = np.sign(u[biggest_abs_val_idx])
154    u *= sign
155    v *= sign
156
157
158class _PLS(
159    TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator, metaclass=ABCMeta
160):
161    """Partial Least Squares (PLS)
162
163    This class implements the generic PLS algorithm.
164
165    Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods,
166    with emphasis on the two-block case
167    https://www.stat.washington.edu/research/reports/2000/tr371.pdf
168    """
169
170    @abstractmethod
171    def __init__(
172        self,
173        n_components=2,
174        *,
175        scale=True,
176        deflation_mode="regression",
177        mode="A",
178        algorithm="nipals",
179        max_iter=500,
180        tol=1e-06,
181        copy=True,
182    ):
183        self.n_components = n_components
184        self.deflation_mode = deflation_mode
185        self.mode = mode
186        self.scale = scale
187        self.algorithm = algorithm
188        self.max_iter = max_iter
189        self.tol = tol
190        self.copy = copy
191
192    def fit(self, X, Y):
193        """Fit model to data.
194
195        Parameters
196        ----------
197        X : array-like of shape (n_samples, n_features)
198            Training vectors, where `n_samples` is the number of samples and
199            `n_features` is the number of predictors.
200
201        Y : array-like of shape (n_samples,) or (n_samples, n_targets)
202            Target vectors, where `n_samples` is the number of samples and
203            `n_targets` is the number of response variables.
204
205        Returns
206        -------
207        self : object
208            Fitted model.
209        """
210
211        check_consistent_length(X, Y)
212        X = self._validate_data(
213            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
214        )
215        Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)
216        if Y.ndim == 1:
217            Y = Y.reshape(-1, 1)
218
219        n = X.shape[0]
220        p = X.shape[1]
221        q = Y.shape[1]
222
223        n_components = self.n_components
224        if self.deflation_mode == "regression":
225            # With PLSRegression n_components is bounded by the rank of (X.T X)
226            # see Wegelin page 25
227            rank_upper_bound = p
228            if not 1 <= n_components <= rank_upper_bound:
229                # TODO: raise an error in 1.1
230                warnings.warn(
231                    f"As of version 0.24, n_components({n_components}) should "
232                    "be in [1, n_features]."
233                    f"n_components={rank_upper_bound} will be used instead. "
234                    "In version 1.1 (renaming of 0.26), an error will be "
235                    "raised.",
236                    FutureWarning,
237                )
238                n_components = rank_upper_bound
239        else:
240            # With CCA and PLSCanonical, n_components is bounded by the rank of
241            # X and the rank of Y: see Wegelin page 12
242            rank_upper_bound = min(n, p, q)
243            if not 1 <= self.n_components <= rank_upper_bound:
244                # TODO: raise an error in 1.1
245                warnings.warn(
246                    f"As of version 0.24, n_components({n_components}) should "
247                    "be in [1, min(n_features, n_samples, n_targets)] = "
248                    f"[1, {rank_upper_bound}]. "
249                    f"n_components={rank_upper_bound} will be used instead. "
250                    "In version 1.1 (renaming of 0.26), an error will be "
251                    "raised.",
252                    FutureWarning,
253                )
254                n_components = rank_upper_bound
255
256        if self.algorithm not in ("svd", "nipals"):
257            raise ValueError(
258                f"algorithm should be 'svd' or 'nipals', got {self.algorithm}."
259            )
260
261        self._norm_y_weights = self.deflation_mode == "canonical"  # 1.1
262        norm_y_weights = self._norm_y_weights
263
264        # Scale (in place)
265        Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(
266            X, Y, self.scale
267        )
268
269        self.x_weights_ = np.zeros((p, n_components))  # U
270        self.y_weights_ = np.zeros((q, n_components))  # V
271        self._x_scores = np.zeros((n, n_components))  # Xi
272        self._y_scores = np.zeros((n, n_components))  # Omega
273        self.x_loadings_ = np.zeros((p, n_components))  # Gamma
274        self.y_loadings_ = np.zeros((q, n_components))  # Delta
275        self.n_iter_ = []
276
277        # This whole thing corresponds to the algorithm in section 4.1 of the
278        # review from Wegelin. See above for a notation mapping from code to
279        # paper.
280        Y_eps = np.finfo(Yk.dtype).eps
281        for k in range(n_components):
282            # Find first left and right singular vectors of the X.T.dot(Y)
283            # cross-covariance matrix.
284            if self.algorithm == "nipals":
285                # Replace columns that are all close to zero with zeros
286                Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)
287                Yk[:, Yk_mask] = 0.0
288
289                try:
290                    (
291                        x_weights,
292                        y_weights,
293                        n_iter_,
294                    ) = _get_first_singular_vectors_power_method(
295                        Xk,
296                        Yk,
297                        mode=self.mode,
298                        max_iter=self.max_iter,
299                        tol=self.tol,
300                        norm_y_weights=norm_y_weights,
301                    )
302                except StopIteration as e:
303                    if str(e) != "Y residual is constant":
304                        raise
305                    warnings.warn(f"Y residual is constant at iteration {k}")
306                    break
307
308                self.n_iter_.append(n_iter_)
309
310            elif self.algorithm == "svd":
311                x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)
312
313            # inplace sign flip for consistency across solvers and archs
314            _svd_flip_1d(x_weights, y_weights)
315
316            # compute scores, i.e. the projections of X and Y
317            x_scores = np.dot(Xk, x_weights)
318            if norm_y_weights:
319                y_ss = 1
320            else:
321                y_ss = np.dot(y_weights, y_weights)
322            y_scores = np.dot(Yk, y_weights) / y_ss
323
324            # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1
325            x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)
326            Xk -= np.outer(x_scores, x_loadings)
327
328            if self.deflation_mode == "canonical":
329                # regress Yk on y_score
330                y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)
331                Yk -= np.outer(y_scores, y_loadings)
332            if self.deflation_mode == "regression":
333                # regress Yk on x_score
334                y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)
335                Yk -= np.outer(x_scores, y_loadings)
336
337            self.x_weights_[:, k] = x_weights
338            self.y_weights_[:, k] = y_weights
339            self._x_scores[:, k] = x_scores
340            self._y_scores[:, k] = y_scores
341            self.x_loadings_[:, k] = x_loadings
342            self.y_loadings_[:, k] = y_loadings
343
344        # X was approximated as Xi . Gamma.T + X_(R+1)
345        # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is
346        # whatever is left to fully reconstruct X, and can be 0 if X is of rank
347        # n_components.
348        # Similarly, Y was approximated as Omega . Delta.T + Y_(R+1)
349
350        # Compute transformation matrices (rotations_). See User Guide.
351        self.x_rotations_ = np.dot(
352            self.x_weights_,
353            pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
354        )
355        self.y_rotations_ = np.dot(
356            self.y_weights_,
357            pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
358        )
359
360        self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
361        self.coef_ = self.coef_ * self._y_std
362        return self
363
364    def transform(self, X, Y=None, copy=True):
365        """Apply the dimension reduction.
366
367        Parameters
368        ----------
369        X : array-like of shape (n_samples, n_features)
370            Samples to transform.
371
372        Y : array-like of shape (n_samples, n_targets), default=None
373            Target vectors.
374
375        copy : bool, default=True
376            Whether to copy `X` and `Y`, or perform in-place normalization.
377
378        Returns
379        -------
380        x_scores, y_scores : array-like or tuple of array-like
381            Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
382        """
383        check_is_fitted(self)
384        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
385        # Normalize
386        X -= self._x_mean
387        X /= self._x_std
388        # Apply rotation
389        x_scores = np.dot(X, self.x_rotations_)
390        if Y is not None:
391            Y = check_array(Y, ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES)
392            if Y.ndim == 1:
393                Y = Y.reshape(-1, 1)
394            Y -= self._y_mean
395            Y /= self._y_std
396            y_scores = np.dot(Y, self.y_rotations_)
397            return x_scores, y_scores
398
399        return x_scores
400
401    def inverse_transform(self, X):
402        """Transform data back to its original space.
403
404        Parameters
405        ----------
406        X : array-like of shape (n_samples, n_components)
407            New data, where `n_samples` is the number of samples
408            and `n_components` is the number of pls components.
409
410        Returns
411        -------
412        self : ndarray of shape (n_samples, n_features)
413            Return the reconstructed array.
414
415        Notes
416        -----
417        This transformation will only be exact if `n_components=n_features`.
418        """
419        check_is_fitted(self)
420        X = check_array(X, dtype=FLOAT_DTYPES)
421        # From pls space to original space
422        X_reconstructed = np.matmul(X, self.x_loadings_.T)
423
424        # Denormalize
425        X_reconstructed *= self._x_std
426        X_reconstructed += self._x_mean
427        return X_reconstructed
428
429    def predict(self, X, copy=True):
430        """Predict targets of given samples.
431
432        Parameters
433        ----------
434        X : array-like of shape (n_samples, n_features)
435            Samples.
436
437        copy : bool, default=True
438            Whether to copy `X` and `Y`, or perform in-place normalization.
439
440        Returns
441        -------
442        y_pred : ndarray of shape (n_samples,) or (n_samples, n_targets)
443            Returns predicted values.
444
445        Notes
446        -----
447        This call requires the estimation of a matrix of shape
448        `(n_features, n_targets)`, which may be an issue in high dimensional
449        space.
450        """
451        check_is_fitted(self)
452        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
453        # Normalize
454        X -= self._x_mean
455        X /= self._x_std
456        Ypred = np.dot(X, self.coef_)
457        return Ypred + self._y_mean
458
459    def fit_transform(self, X, y=None):
460        """Learn and apply the dimension reduction on the train data.
461
462        Parameters
463        ----------
464        X : array-like of shape (n_samples, n_features)
465            Training vectors, where `n_samples` is the number of samples and
466            `n_features` is the number of predictors.
467
468        y : array-like of shape (n_samples, n_targets), default=None
469            Target vectors, where `n_samples` is the number of samples and
470            `n_targets` is the number of response variables.
471
472        Returns
473        -------
474        self : ndarray of shape (n_samples, n_components)
475            Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
476        """
477        return self.fit(X, y).transform(X, y)
478
479    # mypy error: Decorated property not supported
480    @deprecated(  # type: ignore
481        "Attribute `norm_y_weights` was deprecated in version 0.24 and "
482        "will be removed in 1.1 (renaming of 0.26)."
483    )
484    @property
485    def norm_y_weights(self):
486        return self._norm_y_weights
487
488    @deprecated(  # type: ignore
489        "Attribute `x_mean_` was deprecated in version 0.24 and "
490        "will be removed in 1.1 (renaming of 0.26)."
491    )
492    @property
493    def x_mean_(self):
494        return self._x_mean
495
496    @deprecated(  # type: ignore
497        "Attribute `y_mean_` was deprecated in version 0.24 and "
498        "will be removed in 1.1 (renaming of 0.26)."
499    )
500    @property
501    def y_mean_(self):
502        return self._y_mean
503
504    @deprecated(  # type: ignore
505        "Attribute `x_std_` was deprecated in version 0.24 and "
506        "will be removed in 1.1 (renaming of 0.26)."
507    )
508    @property
509    def x_std_(self):
510        return self._x_std
511
512    @deprecated(  # type: ignore
513        "Attribute `y_std_` was deprecated in version 0.24 and "
514        "will be removed in 1.1 (renaming of 0.26)."
515    )
516    @property
517    def y_std_(self):
518        return self._y_std
519
520    @property
521    def x_scores_(self):
522        """Attribute `x_scores_` was deprecated in version 0.24."""
523        # TODO: raise error in 1.1 instead
524        if not isinstance(self, PLSRegression):
525            pass
526            warnings.warn(
527                "Attribute `x_scores_` was deprecated in version 0.24 and "
528                "will be removed in 1.1 (renaming of 0.26). Use "
529                "est.transform(X) on the training data instead.",
530                FutureWarning,
531            )
532        return self._x_scores
533
534    @property
535    def y_scores_(self):
536        """Attribute `y_scores_` was deprecated in version 0.24."""
537        # TODO: raise error in 1.1 instead
538        if not isinstance(self, PLSRegression):
539            warnings.warn(
540                "Attribute `y_scores_` was deprecated in version 0.24 and "
541                "will be removed in 1.1 (renaming of 0.26). Use "
542                "est.transform(X) on the training data instead.",
543                FutureWarning,
544            )
545        return self._y_scores
546
547    def _more_tags(self):
548        return {"poor_score": True, "requires_y": False}
549
550
551class PLSRegression(_PLS):
552    """PLS regression.
553
554    PLSRegression is also known as PLS2 or PLS1, depending on the number of
555    targets.
556
557    Read more in the :ref:`User Guide <cross_decomposition>`.
558
559    .. versionadded:: 0.8
560
561    Parameters
562    ----------
563    n_components : int, default=2
564        Number of components to keep. Should be in `[1, min(n_samples,
565        n_features, n_targets)]`.
566
567    scale : bool, default=True
568        Whether to scale `X` and `Y`.
569
570    max_iter : int, default=500
571        The maximum number of iterations of the power method when
572        `algorithm='nipals'`. Ignored otherwise.
573
574    tol : float, default=1e-06
575        The tolerance used as convergence criteria in the power method: the
576        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
577        than `tol`, where `u` corresponds to the left singular vector.
578
579    copy : bool, default=True
580        Whether to copy `X` and `Y` in :term:`fit` before applying centering,
581        and potentially scaling. If `False`, these operations will be done
582        inplace, modifying both arrays.
583
584    Attributes
585    ----------
586    x_weights_ : ndarray of shape (n_features, n_components)
587        The left singular vectors of the cross-covariance matrices of each
588        iteration.
589
590    y_weights_ : ndarray of shape (n_targets, n_components)
591        The right singular vectors of the cross-covariance matrices of each
592        iteration.
593
594    x_loadings_ : ndarray of shape (n_features, n_components)
595        The loadings of `X`.
596
597    y_loadings_ : ndarray of shape (n_targets, n_components)
598        The loadings of `Y`.
599
600    x_scores_ : ndarray of shape (n_samples, n_components)
601        The transformed training samples.
602
603    y_scores_ : ndarray of shape (n_samples, n_components)
604        The transformed training targets.
605
606    x_rotations_ : ndarray of shape (n_features, n_components)
607        The projection matrix used to transform `X`.
608
609    y_rotations_ : ndarray of shape (n_features, n_components)
610        The projection matrix used to transform `Y`.
611
612    coef_ : ndarray of shape (n_features, n_targets)
613        The coefficients of the linear model such that `Y` is approximated as
614        `Y = X @ coef_`.
615
616    n_iter_ : list of shape (n_components,)
617        Number of iterations of the power method, for each
618        component.
619
620    n_features_in_ : int
621        Number of features seen during :term:`fit`.
622
623    feature_names_in_ : ndarray of shape (`n_features_in_`,)
624        Names of features seen during :term:`fit`. Defined only when `X`
625        has feature names that are all strings.
626
627        .. versionadded:: 1.0
628
629    See Also
630    --------
631    PLSCanonical : Partial Least Squares transformer and regressor.
632
633    Examples
634    --------
635    >>> from sklearn.cross_decomposition import PLSRegression
636    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
637    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
638    >>> pls2 = PLSRegression(n_components=2)
639    >>> pls2.fit(X, Y)
640    PLSRegression()
641    >>> Y_pred = pls2.predict(X)
642    """
643
644    # This implementation provides the same results that 3 PLS packages
645    # provided in the R language (R-project):
646    #     - "mixOmics" with function pls(X, Y, mode = "regression")
647    #     - "plspm " with function plsreg2(X, Y)
648    #     - "pls" with function oscorespls.fit(X, Y)
649
650    def __init__(
651        self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True
652    ):
653        super().__init__(
654            n_components=n_components,
655            scale=scale,
656            deflation_mode="regression",
657            mode="A",
658            algorithm="nipals",
659            max_iter=max_iter,
660            tol=tol,
661            copy=copy,
662        )
663
664
665class PLSCanonical(_PLS):
666    """Partial Least Squares transformer and regressor.
667
668    Read more in the :ref:`User Guide <cross_decomposition>`.
669
670    .. versionadded:: 0.8
671
672    Parameters
673    ----------
674    n_components : int, default=2
675        Number of components to keep. Should be in `[1, min(n_samples,
676        n_features, n_targets)]`.
677
678    scale : bool, default=True
679        Whether to scale `X` and `Y`.
680
681    algorithm : {'nipals', 'svd'}, default='nipals'
682        The algorithm used to estimate the first singular vectors of the
683        cross-covariance matrix. 'nipals' uses the power method while 'svd'
684        will compute the whole SVD.
685
686    max_iter : int, default=500
687        The maximum number of iterations of the power method when
688        `algorithm='nipals'`. Ignored otherwise.
689
690    tol : float, default=1e-06
691        The tolerance used as convergence criteria in the power method: the
692        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
693        than `tol`, where `u` corresponds to the left singular vector.
694
695    copy : bool, default=True
696        Whether to copy `X` and `Y` in fit before applying centering, and
697        potentially scaling. If False, these operations will be done inplace,
698        modifying both arrays.
699
700    Attributes
701    ----------
702    x_weights_ : ndarray of shape (n_features, n_components)
703        The left singular vectors of the cross-covariance matrices of each
704        iteration.
705
706    y_weights_ : ndarray of shape (n_targets, n_components)
707        The right singular vectors of the cross-covariance matrices of each
708        iteration.
709
710    x_loadings_ : ndarray of shape (n_features, n_components)
711        The loadings of `X`.
712
713    y_loadings_ : ndarray of shape (n_targets, n_components)
714        The loadings of `Y`.
715
716    x_scores_ : ndarray of shape (n_samples, n_components)
717        The transformed training samples.
718
719        .. deprecated:: 0.24
720           `x_scores_` is deprecated in 0.24 and will be removed in 1.1
721           (renaming of 0.26). You can just call `transform` on the training
722           data instead.
723
724    y_scores_ : ndarray of shape (n_samples, n_components)
725        The transformed training targets.
726
727        .. deprecated:: 0.24
728           `y_scores_` is deprecated in 0.24 and will be removed in 1.1
729           (renaming of 0.26). You can just call `transform` on the training
730           data instead.
731
732    x_rotations_ : ndarray of shape (n_features, n_components)
733        The projection matrix used to transform `X`.
734
735    y_rotations_ : ndarray of shape (n_features, n_components)
736        The projection matrix used to transform `Y`.
737
738    coef_ : ndarray of shape (n_features, n_targets)
739        The coefficients of the linear model such that `Y` is approximated as
740        `Y = X @ coef_`.
741
742    n_iter_ : list of shape (n_components,)
743        Number of iterations of the power method, for each
744        component. Empty if `algorithm='svd'`.
745
746    n_features_in_ : int
747        Number of features seen during :term:`fit`.
748
749    feature_names_in_ : ndarray of shape (`n_features_in_`,)
750        Names of features seen during :term:`fit`. Defined only when `X`
751        has feature names that are all strings.
752
753        .. versionadded:: 1.0
754
755    See Also
756    --------
757    CCA : Canonical Correlation Analysis.
758    PLSSVD : Partial Least Square SVD.
759
760    Examples
761    --------
762    >>> from sklearn.cross_decomposition import PLSCanonical
763    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
764    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
765    >>> plsca = PLSCanonical(n_components=2)
766    >>> plsca.fit(X, Y)
767    PLSCanonical()
768    >>> X_c, Y_c = plsca.transform(X, Y)
769    """
770
771    # This implementation provides the same results that the "plspm" package
772    # provided in the R language (R-project), using the function plsca(X, Y).
773    # Results are equal or collinear with the function
774    # ``pls(..., mode = "canonical")`` of the "mixOmics" package. The
775    # difference relies in the fact that mixOmics implementation does not
776    # exactly implement the Wold algorithm since it does not normalize
777    # y_weights to one.
778
779    def __init__(
780        self,
781        n_components=2,
782        *,
783        scale=True,
784        algorithm="nipals",
785        max_iter=500,
786        tol=1e-06,
787        copy=True,
788    ):
789        super().__init__(
790            n_components=n_components,
791            scale=scale,
792            deflation_mode="canonical",
793            mode="A",
794            algorithm=algorithm,
795            max_iter=max_iter,
796            tol=tol,
797            copy=copy,
798        )
799
800
801class CCA(_PLS):
802    """Canonical Correlation Analysis, also known as "Mode B" PLS.
803
804    Read more in the :ref:`User Guide <cross_decomposition>`.
805
806    Parameters
807    ----------
808    n_components : int, default=2
809        Number of components to keep. Should be in `[1, min(n_samples,
810        n_features, n_targets)]`.
811
812    scale : bool, default=True
813        Whether to scale `X` and `Y`.
814
815    max_iter : int, default=500
816        The maximum number of iterations of the power method.
817
818    tol : float, default=1e-06
819        The tolerance used as convergence criteria in the power method: the
820        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
821        than `tol`, where `u` corresponds to the left singular vector.
822
823    copy : bool, default=True
824        Whether to copy `X` and `Y` in fit before applying centering, and
825        potentially scaling. If False, these operations will be done inplace,
826        modifying both arrays.
827
828    Attributes
829    ----------
830    x_weights_ : ndarray of shape (n_features, n_components)
831        The left singular vectors of the cross-covariance matrices of each
832        iteration.
833
834    y_weights_ : ndarray of shape (n_targets, n_components)
835        The right singular vectors of the cross-covariance matrices of each
836        iteration.
837
838    x_loadings_ : ndarray of shape (n_features, n_components)
839        The loadings of `X`.
840
841    y_loadings_ : ndarray of shape (n_targets, n_components)
842        The loadings of `Y`.
843
844    x_scores_ : ndarray of shape (n_samples, n_components)
845        The transformed training samples.
846
847        .. deprecated:: 0.24
848           `x_scores_` is deprecated in 0.24 and will be removed in 1.1
849           (renaming of 0.26). You can just call `transform` on the training
850           data instead.
851
852    y_scores_ : ndarray of shape (n_samples, n_components)
853        The transformed training targets.
854
855        .. deprecated:: 0.24
856           `y_scores_` is deprecated in 0.24 and will be removed in 1.1
857           (renaming of 0.26). You can just call `transform` on the training
858           data instead.
859
860    x_rotations_ : ndarray of shape (n_features, n_components)
861        The projection matrix used to transform `X`.
862
863    y_rotations_ : ndarray of shape (n_features, n_components)
864        The projection matrix used to transform `Y`.
865
866    coef_ : ndarray of shape (n_features, n_targets)
867        The coefficients of the linear model such that `Y` is approximated as
868        `Y = X @ coef_`.
869
870    n_iter_ : list of shape (n_components,)
871        Number of iterations of the power method, for each
872        component.
873
874    n_features_in_ : int
875        Number of features seen during :term:`fit`.
876
877    feature_names_in_ : ndarray of shape (`n_features_in_`,)
878        Names of features seen during :term:`fit`. Defined only when `X`
879        has feature names that are all strings.
880
881        .. versionadded:: 1.0
882
883    See Also
884    --------
885    PLSCanonical : Partial Least Squares transformer and regressor.
886    PLSSVD : Partial Least Square SVD.
887
888    Examples
889    --------
890    >>> from sklearn.cross_decomposition import CCA
891    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
892    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
893    >>> cca = CCA(n_components=1)
894    >>> cca.fit(X, Y)
895    CCA(n_components=1)
896    >>> X_c, Y_c = cca.transform(X, Y)
897    """
898
899    def __init__(
900        self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True
901    ):
902        super().__init__(
903            n_components=n_components,
904            scale=scale,
905            deflation_mode="canonical",
906            mode="B",
907            algorithm="nipals",
908            max_iter=max_iter,
909            tol=tol,
910            copy=copy,
911        )
912
913
914class PLSSVD(TransformerMixin, BaseEstimator):
915    """Partial Least Square SVD.
916
917    This transformer simply performs a SVD on the cross-covariance matrix
918    `X'Y`. It is able to project both the training data `X` and the targets
919    `Y`. The training data `X` is projected on the left singular vectors, while
920    the targets are projected on the right singular vectors.
921
922    Read more in the :ref:`User Guide <cross_decomposition>`.
923
924    .. versionadded:: 0.8
925
926    Parameters
927    ----------
928    n_components : int, default=2
929        The number of components to keep. Should be in `[1,
930        min(n_samples, n_features, n_targets)]`.
931
932    scale : bool, default=True
933        Whether to scale `X` and `Y`.
934
935    copy : bool, default=True
936        Whether to copy `X` and `Y` in fit before applying centering, and
937        potentially scaling. If `False`, these operations will be done inplace,
938        modifying both arrays.
939
940    Attributes
941    ----------
942    x_weights_ : ndarray of shape (n_features, n_components)
943        The left singular vectors of the SVD of the cross-covariance matrix.
944        Used to project `X` in :meth:`transform`.
945
946    y_weights_ : ndarray of (n_targets, n_components)
947        The right singular vectors of the SVD of the cross-covariance matrix.
948        Used to project `X` in :meth:`transform`.
949
950    x_scores_ : ndarray of shape (n_samples, n_components)
951        The transformed training samples.
952
953        .. deprecated:: 0.24
954           `x_scores_` is deprecated in 0.24 and will be removed in 1.1
955           (renaming of 0.26). You can just call `transform` on the training
956           data instead.
957
958    y_scores_ : ndarray of shape (n_samples, n_components)
959        The transformed training targets.
960
961        .. deprecated:: 0.24
962           `y_scores_` is deprecated in 0.24 and will be removed in 1.1
963           (renaming of 0.26). You can just call `transform` on the training
964           data instead.
965
966    n_features_in_ : int
967        Number of features seen during :term:`fit`.
968
969    feature_names_in_ : ndarray of shape (`n_features_in_`,)
970        Names of features seen during :term:`fit`. Defined only when `X`
971        has feature names that are all strings.
972
973        .. versionadded:: 1.0
974
975    See Also
976    --------
977    PLSCanonical : Partial Least Squares transformer and regressor.
978    CCA : Canonical Correlation Analysis.
979
980    Examples
981    --------
982    >>> import numpy as np
983    >>> from sklearn.cross_decomposition import PLSSVD
984    >>> X = np.array([[0., 0., 1.],
985    ...               [1., 0., 0.],
986    ...               [2., 2., 2.],
987    ...               [2., 5., 4.]])
988    >>> Y = np.array([[0.1, -0.2],
989    ...               [0.9, 1.1],
990    ...               [6.2, 5.9],
991    ...               [11.9, 12.3]])
992    >>> pls = PLSSVD(n_components=2).fit(X, Y)
993    >>> X_c, Y_c = pls.transform(X, Y)
994    >>> X_c.shape, Y_c.shape
995    ((4, 2), (4, 2))
996    """
997
998    def __init__(self, n_components=2, *, scale=True, copy=True):
999        self.n_components = n_components
1000        self.scale = scale
1001        self.copy = copy
1002
1003    def fit(self, X, Y):
1004        """Fit model to data.
1005
1006        Parameters
1007        ----------
1008        X : array-like of shape (n_samples, n_features)
1009            Training samples.
1010
1011        Y : array-like of shape (n_samples,) or (n_samples, n_targets)
1012            Targets.
1013
1014        Returns
1015        -------
1016        self : object
1017            Fitted estimator.
1018        """
1019        check_consistent_length(X, Y)
1020        X = self._validate_data(
1021            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
1022        )
1023        Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)
1024        if Y.ndim == 1:
1025            Y = Y.reshape(-1, 1)
1026
1027        # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)
1028        # This matrix rank is at most min(n_samples, n_features, n_targets) so
1029        # n_components cannot be bigger than that.
1030        n_components = self.n_components
1031        rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])
1032        if not 1 <= n_components <= rank_upper_bound:
1033            # TODO: raise an error in 1.1
1034            warnings.warn(
1035                f"As of version 0.24, n_components({n_components}) should be "
1036                "in [1, min(n_features, n_samples, n_targets)] = "
1037                f"[1, {rank_upper_bound}]. "
1038                f"n_components={rank_upper_bound} will be used instead. "
1039                "In version 1.1 (renaming of 0.26), an error will be raised.",
1040                FutureWarning,
1041            )
1042            n_components = rank_upper_bound
1043
1044        X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(
1045            X, Y, self.scale
1046        )
1047
1048        # Compute SVD of cross-covariance matrix
1049        C = np.dot(X.T, Y)
1050        U, s, Vt = svd(C, full_matrices=False)
1051        U = U[:, :n_components]
1052        Vt = Vt[:n_components]
1053        U, Vt = svd_flip(U, Vt)
1054        V = Vt.T
1055
1056        self._x_scores = np.dot(X, U)  # TODO: remove in 1.1
1057        self._y_scores = np.dot(Y, V)  # TODO: remove in 1.1
1058        self.x_weights_ = U
1059        self.y_weights_ = V
1060        return self
1061
1062    # mypy error: Decorated property not supported
1063    @deprecated(  # type: ignore
1064        "Attribute `x_scores_` was deprecated in version 0.24 and "
1065        "will be removed in 1.1 (renaming of 0.26). Use est.transform(X) on "
1066        "the training data instead."
1067    )
1068    @property
1069    def x_scores_(self):
1070        return self._x_scores
1071
1072    # mypy error: Decorated property not supported
1073    @deprecated(  # type: ignore
1074        "Attribute `y_scores_` was deprecated in version 0.24 and "
1075        "will be removed in 1.1 (renaming of 0.26). Use est.transform(X, Y) "
1076        "on the training data instead."
1077    )
1078    @property
1079    def y_scores_(self):
1080        return self._y_scores
1081
1082    @deprecated(  # type: ignore
1083        "Attribute `x_mean_` was deprecated in version 0.24 and "
1084        "will be removed in 1.1 (renaming of 0.26)."
1085    )
1086    @property
1087    def x_mean_(self):
1088        return self._x_mean
1089
1090    @deprecated(  # type: ignore
1091        "Attribute `y_mean_` was deprecated in version 0.24 and "
1092        "will be removed in 1.1 (renaming of 0.26)."
1093    )
1094    @property
1095    def y_mean_(self):
1096        return self._y_mean
1097
1098    @deprecated(  # type: ignore
1099        "Attribute `x_std_` was deprecated in version 0.24 and "
1100        "will be removed in 1.1 (renaming of 0.26)."
1101    )
1102    @property
1103    def x_std_(self):
1104        return self._x_std
1105
1106    @deprecated(  # type: ignore
1107        "Attribute `y_std_` was deprecated in version 0.24 and "
1108        "will be removed in 1.1 (renaming of 0.26)."
1109    )
1110    @property
1111    def y_std_(self):
1112        return self._y_std
1113
1114    def transform(self, X, Y=None):
1115        """
1116        Apply the dimensionality reduction.
1117
1118        Parameters
1119        ----------
1120        X : array-like of shape (n_samples, n_features)
1121            Samples to be transformed.
1122
1123        Y : array-like of shape (n_samples,) or (n_samples, n_targets), \
1124                default=None
1125            Targets.
1126
1127        Returns
1128        -------
1129        x_scores : array-like or tuple of array-like
1130            The transformed data `X_tranformed` if `Y is not None`,
1131            `(X_transformed, Y_transformed)` otherwise.
1132        """
1133        check_is_fitted(self)
1134        X = self._validate_data(X, dtype=np.float64, reset=False)
1135        Xr = (X - self._x_mean) / self._x_std
1136        x_scores = np.dot(Xr, self.x_weights_)
1137        if Y is not None:
1138            Y = check_array(Y, ensure_2d=False, dtype=np.float64)
1139            if Y.ndim == 1:
1140                Y = Y.reshape(-1, 1)
1141            Yr = (Y - self._y_mean) / self._y_std
1142            y_scores = np.dot(Yr, self.y_weights_)
1143            return x_scores, y_scores
1144        return x_scores
1145
1146    def fit_transform(self, X, y=None):
1147        """Learn and apply the dimensionality reduction.
1148
1149        Parameters
1150        ----------
1151        X : array-like of shape (n_samples, n_features)
1152            Training samples.
1153
1154        y : array-like of shape (n_samples,) or (n_samples, n_targets), \
1155                default=None
1156            Targets.
1157
1158        Returns
1159        -------
1160        out : array-like or tuple of array-like
1161            The transformed data `X_tranformed` if `Y is not None`,
1162            `(X_transformed, Y_transformed)` otherwise.
1163        """
1164        return self.fit(X, y).transform(X, y)
1165