1""" 2Generalized Linear Models. 3""" 4 5# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr> 6# Fabian Pedregosa <fabian.pedregosa@inria.fr> 7# Olivier Grisel <olivier.grisel@ensta.org> 8# Vincent Michel <vincent.michel@inria.fr> 9# Peter Prettenhofer <peter.prettenhofer@gmail.com> 10# Mathieu Blondel <mathieu@mblondel.org> 11# Lars Buitinck 12# Maryan Morel <maryan.morel@polytechnique.edu> 13# Giorgio Patrini <giorgio.patrini@anu.edu.au> 14# Maria Telenczuk <https://github.com/maikia> 15# License: BSD 3 clause 16 17from abc import ABCMeta, abstractmethod 18import numbers 19import warnings 20 21import numpy as np 22import scipy.sparse as sp 23from scipy import linalg 24from scipy import optimize 25from scipy import sparse 26from scipy.special import expit 27from joblib import Parallel 28 29from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin 30from ..preprocessing._data import _is_constant_feature 31from ..utils import check_array 32from ..utils.validation import FLOAT_DTYPES 33from ..utils import check_random_state 34from ..utils.extmath import safe_sparse_dot 35from ..utils.extmath import _incremental_mean_and_var 36from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale 37from ..utils.fixes import sparse_lsqr 38from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 39from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 40from ..utils.validation import check_is_fitted, _check_sample_weight 41from ..utils.fixes import delayed 42 43# TODO: bayesian_ridge_regression and bayesian_regression_ard 44# should be squashed into its respective objects. 45 46SPARSE_INTERCEPT_DECAY = 0.01 47# For sparse data intercept updates are scaled by this decay factor to avoid 48# intercept oscillation. 49 50 51# FIXME in 1.2: parameter 'normalize' should be removed from linear models 52# in cases where now normalize=False. The default value of 'normalize' should 53# be changed to False in linear models where now normalize=True 54def _deprecate_normalize(normalize, default, estimator_name): 55 """Normalize is to be deprecated from linear models and a use of 56 a pipeline with a StandardScaler is to be recommended instead. 57 Here the appropriate message is selected to be displayed to the user 58 depending on the default normalize value (as it varies between the linear 59 models and normalize value selected by the user). 60 61 Parameters 62 ---------- 63 normalize : bool, 64 normalize value passed by the user 65 66 default : bool, 67 default normalize value used by the estimator 68 69 estimator_name : str 70 name of the linear estimator which calls this function. 71 The name will be used for writing the deprecation warnings 72 73 Returns 74 ------- 75 normalize : bool, 76 normalize value which should further be used by the estimator at this 77 stage of the depreciation process 78 79 Notes 80 ----- 81 This function should be updated in 1.2 depending on the value of 82 `normalize`: 83 - True, warning: `normalize` was deprecated in 1.2 and will be removed in 84 1.4. Suggest to use pipeline instead. 85 - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4. 86 Leave normalize to its default value. 87 - `deprecated` - this should only be possible with default == False as from 88 1.2 `normalize` in all the linear models should be either removed or the 89 default should be set to False. 90 This function should be completely removed in 1.4. 91 """ 92 93 if normalize not in [True, False, "deprecated"]: 94 raise ValueError( 95 "Leave 'normalize' to its default value or set it to True or False" 96 ) 97 98 if normalize == "deprecated": 99 _normalize = default 100 else: 101 _normalize = normalize 102 103 pipeline_msg = ( 104 "If you wish to scale the data, use Pipeline with a StandardScaler " 105 "in a preprocessing stage. To reproduce the previous behavior:\n\n" 106 "from sklearn.pipeline import make_pipeline\n\n" 107 "model = make_pipeline(StandardScaler(with_mean=False), " 108 f"{estimator_name}())\n\n" 109 "If you wish to pass a sample_weight parameter, you need to pass it " 110 "as a fit parameter to each step of the pipeline as follows:\n\n" 111 "kwargs = {s[0] + '__sample_weight': sample_weight for s " 112 "in model.steps}\n" 113 "model.fit(X, y, **kwargs)\n\n" 114 ) 115 116 if estimator_name == "Ridge" or estimator_name == "RidgeClassifier": 117 alpha_msg = "Set parameter alpha to: original_alpha * n_samples. " 118 elif "Lasso" in estimator_name: 119 alpha_msg = "Set parameter alpha to: original_alpha * np.sqrt(n_samples). " 120 elif "ElasticNet" in estimator_name: 121 alpha_msg = ( 122 "Set parameter alpha to original_alpha * np.sqrt(n_samples) if " 123 "l1_ratio is 1, and to original_alpha * n_samples if l1_ratio is " 124 "0. For other values of l1_ratio, no analytic formula is " 125 "available." 126 ) 127 elif estimator_name == "RidgeCV" or estimator_name == "RidgeClassifierCV": 128 alpha_msg = "Set parameter alphas to: original_alphas * n_samples. " 129 else: 130 alpha_msg = "" 131 132 if default and normalize == "deprecated": 133 warnings.warn( 134 "The default of 'normalize' will be set to False in version 1.2 " 135 "and deprecated in version 1.4.\n" 136 + pipeline_msg 137 + alpha_msg, 138 FutureWarning, 139 ) 140 elif normalize != "deprecated" and normalize and not default: 141 warnings.warn( 142 "'normalize' was deprecated in version 1.0 and will be removed in 1.2.\n" 143 + pipeline_msg 144 + alpha_msg, 145 FutureWarning, 146 ) 147 elif not normalize and not default: 148 warnings.warn( 149 "'normalize' was deprecated in version 1.0 and will be " 150 "removed in 1.2. " 151 "Please leave the normalize parameter to its default value to " 152 "silence this warning. The default behavior of this estimator " 153 "is to not do any normalization. If normalization is needed " 154 "please use sklearn.preprocessing.StandardScaler instead.", 155 FutureWarning, 156 ) 157 158 return _normalize 159 160 161def make_dataset(X, y, sample_weight, random_state=None): 162 """Create ``Dataset`` abstraction for sparse and dense inputs. 163 164 This also returns the ``intercept_decay`` which is different 165 for sparse datasets. 166 167 Parameters 168 ---------- 169 X : array-like, shape (n_samples, n_features) 170 Training data 171 172 y : array-like, shape (n_samples, ) 173 Target values. 174 175 sample_weight : numpy array of shape (n_samples,) 176 The weight of each sample 177 178 random_state : int, RandomState instance or None (default) 179 Determines random number generation for dataset shuffling and noise. 180 Pass an int for reproducible output across multiple function calls. 181 See :term:`Glossary <random_state>`. 182 183 Returns 184 ------- 185 dataset 186 The ``Dataset`` abstraction 187 intercept_decay 188 The intercept decay 189 """ 190 191 rng = check_random_state(random_state) 192 # seed should never be 0 in SequentialDataset64 193 seed = rng.randint(1, np.iinfo(np.int32).max) 194 195 if X.dtype == np.float32: 196 CSRData = CSRDataset32 197 ArrayData = ArrayDataset32 198 else: 199 CSRData = CSRDataset64 200 ArrayData = ArrayDataset64 201 202 if sp.issparse(X): 203 dataset = CSRData(X.data, X.indptr, X.indices, y, sample_weight, seed=seed) 204 intercept_decay = SPARSE_INTERCEPT_DECAY 205 else: 206 X = np.ascontiguousarray(X) 207 dataset = ArrayData(X, y, sample_weight, seed=seed) 208 intercept_decay = 1.0 209 210 return dataset, intercept_decay 211 212 213def _preprocess_data( 214 X, 215 y, 216 fit_intercept, 217 normalize=False, 218 copy=True, 219 sample_weight=None, 220 return_mean=False, 221 check_input=True, 222): 223 """Center and scale data. 224 225 Centers data to have mean zero along axis 0. If fit_intercept=False or if 226 the X is a sparse matrix, no centering is done, but normalization can still 227 be applied. The function returns the statistics necessary to reconstruct 228 the input data, which are X_offset, y_offset, X_scale, such that the output 229 230 X = (X - X_offset) / X_scale 231 232 X_scale is the L2 norm of X - X_offset. If sample_weight is not None, 233 then the weighted mean of X and y is zero, and not the mean itself. If 234 return_mean=True, the mean, eventually weighted, is returned, independently 235 of whether X was centered (option used for optimization with sparse data in 236 coordinate_descend). 237 238 This is here because nearly all linear models will want their data to be 239 centered. This function also systematically makes y consistent with X.dtype 240 """ 241 if isinstance(sample_weight, numbers.Number): 242 sample_weight = None 243 if sample_weight is not None: 244 sample_weight = np.asarray(sample_weight) 245 246 if check_input: 247 X = check_array(X, copy=copy, accept_sparse=["csr", "csc"], dtype=FLOAT_DTYPES) 248 elif copy: 249 if sp.issparse(X): 250 X = X.copy() 251 else: 252 X = X.copy(order="K") 253 254 y = np.asarray(y, dtype=X.dtype) 255 256 if fit_intercept: 257 if sp.issparse(X): 258 X_offset, X_var = mean_variance_axis(X, axis=0, weights=sample_weight) 259 if not return_mean: 260 X_offset[:] = X.dtype.type(0) 261 else: 262 if normalize: 263 X_offset, X_var, _ = _incremental_mean_and_var( 264 X, 265 last_mean=0.0, 266 last_variance=0.0, 267 last_sample_count=0.0, 268 sample_weight=sample_weight, 269 ) 270 else: 271 X_offset = np.average(X, axis=0, weights=sample_weight) 272 273 X_offset = X_offset.astype(X.dtype, copy=False) 274 X -= X_offset 275 276 if normalize: 277 X_var = X_var.astype(X.dtype, copy=False) 278 # Detect constant features on the computed variance, before taking 279 # the np.sqrt. Otherwise constant features cannot be detected with 280 # sample weights. 281 constant_mask = _is_constant_feature(X_var, X_offset, X.shape[0]) 282 if sample_weight is None: 283 X_var *= X.shape[0] 284 else: 285 X_var *= sample_weight.sum() 286 X_scale = np.sqrt(X_var, out=X_var) 287 X_scale[constant_mask] = 1.0 288 if sp.issparse(X): 289 inplace_column_scale(X, 1.0 / X_scale) 290 else: 291 X /= X_scale 292 else: 293 X_scale = np.ones(X.shape[1], dtype=X.dtype) 294 295 y_offset = np.average(y, axis=0, weights=sample_weight) 296 y = y - y_offset 297 else: 298 X_offset = np.zeros(X.shape[1], dtype=X.dtype) 299 X_scale = np.ones(X.shape[1], dtype=X.dtype) 300 if y.ndim == 1: 301 y_offset = X.dtype.type(0) 302 else: 303 y_offset = np.zeros(y.shape[1], dtype=X.dtype) 304 305 return X, y, X_offset, y_offset, X_scale 306 307 308# TODO: _rescale_data should be factored into _preprocess_data. 309# Currently, the fact that sag implements its own way to deal with 310# sample_weight makes the refactoring tricky. 311 312 313def _rescale_data(X, y, sample_weight): 314 """Rescale data sample-wise by square root of sample_weight. 315 316 For many linear models, this enables easy support for sample_weight. 317 318 Returns 319 ------- 320 X_rescaled : {array-like, sparse matrix} 321 322 y_rescaled : {array-like, sparse matrix} 323 """ 324 n_samples = X.shape[0] 325 sample_weight = np.asarray(sample_weight) 326 if sample_weight.ndim == 0: 327 sample_weight = np.full(n_samples, sample_weight, dtype=sample_weight.dtype) 328 sample_weight = np.sqrt(sample_weight) 329 sw_matrix = sparse.dia_matrix((sample_weight, 0), shape=(n_samples, n_samples)) 330 X = safe_sparse_dot(sw_matrix, X) 331 y = safe_sparse_dot(sw_matrix, y) 332 return X, y 333 334 335class LinearModel(BaseEstimator, metaclass=ABCMeta): 336 """Base class for Linear Models""" 337 338 @abstractmethod 339 def fit(self, X, y): 340 """Fit model.""" 341 342 def _decision_function(self, X): 343 check_is_fitted(self) 344 345 X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], reset=False) 346 return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_ 347 348 def predict(self, X): 349 """ 350 Predict using the linear model. 351 352 Parameters 353 ---------- 354 X : array-like or sparse matrix, shape (n_samples, n_features) 355 Samples. 356 357 Returns 358 ------- 359 C : array, shape (n_samples,) 360 Returns predicted values. 361 """ 362 return self._decision_function(X) 363 364 _preprocess_data = staticmethod(_preprocess_data) 365 366 def _set_intercept(self, X_offset, y_offset, X_scale): 367 """Set the intercept_""" 368 if self.fit_intercept: 369 self.coef_ = self.coef_ / X_scale 370 self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T) 371 else: 372 self.intercept_ = 0.0 373 374 def _more_tags(self): 375 return {"requires_y": True} 376 377 378# XXX Should this derive from LinearModel? It should be a mixin, not an ABC. 379# Maybe the n_features checking can be moved to LinearModel. 380class LinearClassifierMixin(ClassifierMixin): 381 """Mixin for linear classifiers. 382 383 Handles prediction for sparse and dense X. 384 """ 385 386 def decision_function(self, X): 387 """ 388 Predict confidence scores for samples. 389 390 The confidence score for a sample is proportional to the signed 391 distance of that sample to the hyperplane. 392 393 Parameters 394 ---------- 395 X : {array-like, sparse matrix} of shape (n_samples, n_features) 396 The data matrix for which we want to get the confidence scores. 397 398 Returns 399 ------- 400 scores : ndarray of shape (n_samples,) or (n_samples, n_classes) 401 Confidence scores per `(n_samples, n_classes)` combination. In the 402 binary case, confidence score for `self.classes_[1]` where >0 means 403 this class would be predicted. 404 """ 405 check_is_fitted(self) 406 407 X = self._validate_data(X, accept_sparse="csr", reset=False) 408 scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_ 409 return scores.ravel() if scores.shape[1] == 1 else scores 410 411 def predict(self, X): 412 """ 413 Predict class labels for samples in X. 414 415 Parameters 416 ---------- 417 X : {array-like, sparse matrix} of shape (n_samples, n_features) 418 The data matrix for which we want to get the predictions. 419 420 Returns 421 ------- 422 y_pred : ndarray of shape (n_samples,) 423 Vector containing the class labels for each sample. 424 """ 425 scores = self.decision_function(X) 426 if len(scores.shape) == 1: 427 indices = (scores > 0).astype(int) 428 else: 429 indices = scores.argmax(axis=1) 430 return self.classes_[indices] 431 432 def _predict_proba_lr(self, X): 433 """Probability estimation for OvR logistic regression. 434 435 Positive class probabilities are computed as 436 1. / (1. + np.exp(-self.decision_function(X))); 437 multiclass is handled by normalizing that over all classes. 438 """ 439 prob = self.decision_function(X) 440 expit(prob, out=prob) 441 if prob.ndim == 1: 442 return np.vstack([1 - prob, prob]).T 443 else: 444 # OvR normalization, like LibLinear's predict_probability 445 prob /= prob.sum(axis=1).reshape((prob.shape[0], -1)) 446 return prob 447 448 449class SparseCoefMixin: 450 """Mixin for converting coef_ to and from CSR format. 451 452 L1-regularizing estimators should inherit this. 453 """ 454 455 def densify(self): 456 """ 457 Convert coefficient matrix to dense array format. 458 459 Converts the ``coef_`` member (back) to a numpy.ndarray. This is the 460 default format of ``coef_`` and is required for fitting, so calling 461 this method is only required on models that have previously been 462 sparsified; otherwise, it is a no-op. 463 464 Returns 465 ------- 466 self 467 Fitted estimator. 468 """ 469 msg = "Estimator, %(name)s, must be fitted before densifying." 470 check_is_fitted(self, msg=msg) 471 if sp.issparse(self.coef_): 472 self.coef_ = self.coef_.toarray() 473 return self 474 475 def sparsify(self): 476 """ 477 Convert coefficient matrix to sparse format. 478 479 Converts the ``coef_`` member to a scipy.sparse matrix, which for 480 L1-regularized models can be much more memory- and storage-efficient 481 than the usual numpy.ndarray representation. 482 483 The ``intercept_`` member is not converted. 484 485 Returns 486 ------- 487 self 488 Fitted estimator. 489 490 Notes 491 ----- 492 For non-sparse models, i.e. when there are not many zeros in ``coef_``, 493 this may actually *increase* memory usage, so use this method with 494 care. A rule of thumb is that the number of zero elements, which can 495 be computed with ``(coef_ == 0).sum()``, must be more than 50% for this 496 to provide significant benefits. 497 498 After calling this method, further fitting with the partial_fit 499 method (if any) will not work until you call densify. 500 """ 501 msg = "Estimator, %(name)s, must be fitted before sparsifying." 502 check_is_fitted(self, msg=msg) 503 self.coef_ = sp.csr_matrix(self.coef_) 504 return self 505 506 507class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): 508 """ 509 Ordinary least squares Linear Regression. 510 511 LinearRegression fits a linear model with coefficients w = (w1, ..., wp) 512 to minimize the residual sum of squares between the observed targets in 513 the dataset, and the targets predicted by the linear approximation. 514 515 Parameters 516 ---------- 517 fit_intercept : bool, default=True 518 Whether to calculate the intercept for this model. If set 519 to False, no intercept will be used in calculations 520 (i.e. data is expected to be centered). 521 522 normalize : bool, default=False 523 This parameter is ignored when ``fit_intercept`` is set to False. 524 If True, the regressors X will be normalized before regression by 525 subtracting the mean and dividing by the l2-norm. 526 If you wish to standardize, please use 527 :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` 528 on an estimator with ``normalize=False``. 529 530 .. deprecated:: 1.0 531 `normalize` was deprecated in version 1.0 and will be 532 removed in 1.2. 533 534 copy_X : bool, default=True 535 If True, X will be copied; else, it may be overwritten. 536 537 n_jobs : int, default=None 538 The number of jobs to use for the computation. This will only provide 539 speedup in case of sufficiently large problems, that is if firstly 540 `n_targets > 1` and secondly `X` is sparse or if `positive` is set 541 to `True`. ``None`` means 1 unless in a 542 :obj:`joblib.parallel_backend` context. ``-1`` means using all 543 processors. See :term:`Glossary <n_jobs>` for more details. 544 545 positive : bool, default=False 546 When set to ``True``, forces the coefficients to be positive. This 547 option is only supported for dense arrays. 548 549 .. versionadded:: 0.24 550 551 Attributes 552 ---------- 553 coef_ : array of shape (n_features, ) or (n_targets, n_features) 554 Estimated coefficients for the linear regression problem. 555 If multiple targets are passed during the fit (y 2D), this 556 is a 2D array of shape (n_targets, n_features), while if only 557 one target is passed, this is a 1D array of length n_features. 558 559 rank_ : int 560 Rank of matrix `X`. Only available when `X` is dense. 561 562 singular_ : array of shape (min(X, y),) 563 Singular values of `X`. Only available when `X` is dense. 564 565 intercept_ : float or array of shape (n_targets,) 566 Independent term in the linear model. Set to 0.0 if 567 `fit_intercept = False`. 568 569 n_features_in_ : int 570 Number of features seen during :term:`fit`. 571 572 .. versionadded:: 0.24 573 574 feature_names_in_ : ndarray of shape (`n_features_in_`,) 575 Names of features seen during :term:`fit`. Defined only when `X` 576 has feature names that are all strings. 577 578 .. versionadded:: 1.0 579 580 See Also 581 -------- 582 Ridge : Ridge regression addresses some of the 583 problems of Ordinary Least Squares by imposing a penalty on the 584 size of the coefficients with l2 regularization. 585 Lasso : The Lasso is a linear model that estimates 586 sparse coefficients with l1 regularization. 587 ElasticNet : Elastic-Net is a linear regression 588 model trained with both l1 and l2 -norm regularization of the 589 coefficients. 590 591 Notes 592 ----- 593 From the implementation point of view, this is just plain Ordinary 594 Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares 595 (scipy.optimize.nnls) wrapped as a predictor object. 596 597 Examples 598 -------- 599 >>> import numpy as np 600 >>> from sklearn.linear_model import LinearRegression 601 >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) 602 >>> # y = 1 * x_0 + 2 * x_1 + 3 603 >>> y = np.dot(X, np.array([1, 2])) + 3 604 >>> reg = LinearRegression().fit(X, y) 605 >>> reg.score(X, y) 606 1.0 607 >>> reg.coef_ 608 array([1., 2.]) 609 >>> reg.intercept_ 610 3.0... 611 >>> reg.predict(np.array([[3, 5]])) 612 array([16.]) 613 """ 614 615 def __init__( 616 self, 617 *, 618 fit_intercept=True, 619 normalize="deprecated", 620 copy_X=True, 621 n_jobs=None, 622 positive=False, 623 ): 624 self.fit_intercept = fit_intercept 625 self.normalize = normalize 626 self.copy_X = copy_X 627 self.n_jobs = n_jobs 628 self.positive = positive 629 630 def fit(self, X, y, sample_weight=None): 631 """ 632 Fit linear model. 633 634 Parameters 635 ---------- 636 X : {array-like, sparse matrix} of shape (n_samples, n_features) 637 Training data. 638 639 y : array-like of shape (n_samples,) or (n_samples, n_targets) 640 Target values. Will be cast to X's dtype if necessary. 641 642 sample_weight : array-like of shape (n_samples,), default=None 643 Individual weights for each sample. 644 645 .. versionadded:: 0.17 646 parameter *sample_weight* support to LinearRegression. 647 648 Returns 649 ------- 650 self : object 651 Fitted Estimator. 652 """ 653 654 _normalize = _deprecate_normalize( 655 self.normalize, default=False, estimator_name=self.__class__.__name__ 656 ) 657 658 n_jobs_ = self.n_jobs 659 660 accept_sparse = False if self.positive else ["csr", "csc", "coo"] 661 662 X, y = self._validate_data( 663 X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True 664 ) 665 666 if sample_weight is not None: 667 sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) 668 669 X, y, X_offset, y_offset, X_scale = self._preprocess_data( 670 X, 671 y, 672 fit_intercept=self.fit_intercept, 673 normalize=_normalize, 674 copy=self.copy_X, 675 sample_weight=sample_weight, 676 return_mean=True, 677 ) 678 679 if sample_weight is not None: 680 # Sample weight can be implemented via a simple rescaling. 681 X, y = _rescale_data(X, y, sample_weight) 682 683 if self.positive: 684 if y.ndim < 2: 685 self.coef_, self._residues = optimize.nnls(X, y) 686 else: 687 # scipy.optimize.nnls cannot handle y with shape (M, K) 688 outs = Parallel(n_jobs=n_jobs_)( 689 delayed(optimize.nnls)(X, y[:, j]) for j in range(y.shape[1]) 690 ) 691 self.coef_, self._residues = map(np.vstack, zip(*outs)) 692 elif sp.issparse(X): 693 X_offset_scale = X_offset / X_scale 694 695 def matvec(b): 696 return X.dot(b) - b.dot(X_offset_scale) 697 698 def rmatvec(b): 699 return X.T.dot(b) - X_offset_scale * np.sum(b) 700 701 X_centered = sparse.linalg.LinearOperator( 702 shape=X.shape, matvec=matvec, rmatvec=rmatvec 703 ) 704 705 if y.ndim < 2: 706 out = sparse_lsqr(X_centered, y) 707 self.coef_ = out[0] 708 self._residues = out[3] 709 else: 710 # sparse_lstsq cannot handle y with shape (M, K) 711 outs = Parallel(n_jobs=n_jobs_)( 712 delayed(sparse_lsqr)(X_centered, y[:, j].ravel()) 713 for j in range(y.shape[1]) 714 ) 715 self.coef_ = np.vstack([out[0] for out in outs]) 716 self._residues = np.vstack([out[3] for out in outs]) 717 else: 718 self.coef_, self._residues, self.rank_, self.singular_ = linalg.lstsq(X, y) 719 self.coef_ = self.coef_.T 720 721 if y.ndim == 1: 722 self.coef_ = np.ravel(self.coef_) 723 self._set_intercept(X_offset, y_offset, X_scale) 724 return self 725 726 727def _check_precomputed_gram_matrix( 728 X, precompute, X_offset, X_scale, rtol=1e-7, atol=1e-5 729): 730 """Computes a single element of the gram matrix and compares it to 731 the corresponding element of the user supplied gram matrix. 732 733 If the values do not match a ValueError will be thrown. 734 735 Parameters 736 ---------- 737 X : ndarray of shape (n_samples, n_features) 738 Data array. 739 740 precompute : array-like of shape (n_features, n_features) 741 User-supplied gram matrix. 742 743 X_offset : ndarray of shape (n_features,) 744 Array of feature means used to center design matrix. 745 746 X_scale : ndarray of shape (n_features,) 747 Array of feature scale factors used to normalize design matrix. 748 749 rtol : float, default=1e-7 750 Relative tolerance; see numpy.allclose. 751 752 atol : float, default=1e-5 753 absolute tolerance; see :func`numpy.allclose`. Note that the default 754 here is more tolerant than the default for 755 :func:`numpy.testing.assert_allclose`, where `atol=0`. 756 757 Raises 758 ------ 759 ValueError 760 Raised when the provided Gram matrix is not consistent. 761 """ 762 763 n_features = X.shape[1] 764 f1 = n_features // 2 765 f2 = min(f1 + 1, n_features - 1) 766 767 v1 = (X[:, f1] - X_offset[f1]) * X_scale[f1] 768 v2 = (X[:, f2] - X_offset[f2]) * X_scale[f2] 769 770 expected = np.dot(v1, v2) 771 actual = precompute[f1, f2] 772 773 if not np.isclose(expected, actual, rtol=rtol, atol=atol): 774 raise ValueError( 775 "Gram matrix passed in via 'precompute' parameter " 776 "did not pass validation when a single element was " 777 "checked - please check that it was computed " 778 f"properly. For element ({f1},{f2}) we computed " 779 f"{expected} but the user-supplied value was " 780 f"{actual}." 781 ) 782 783 784def _pre_fit( 785 X, 786 y, 787 Xy, 788 precompute, 789 normalize, 790 fit_intercept, 791 copy, 792 check_input=True, 793 sample_weight=None, 794): 795 """Aux function used at beginning of fit in linear models 796 797 Parameters 798 ---------- 799 order : 'F', 'C' or None, default=None 800 Whether X and y will be forced to be fortran or c-style. Only relevant 801 if sample_weight is not None. 802 """ 803 n_samples, n_features = X.shape 804 805 if sparse.isspmatrix(X): 806 # copy is not needed here as X is not modified inplace when X is sparse 807 precompute = False 808 X, y, X_offset, y_offset, X_scale = _preprocess_data( 809 X, 810 y, 811 fit_intercept=fit_intercept, 812 normalize=normalize, 813 copy=False, 814 return_mean=True, 815 check_input=check_input, 816 ) 817 else: 818 # copy was done in fit if necessary 819 X, y, X_offset, y_offset, X_scale = _preprocess_data( 820 X, 821 y, 822 fit_intercept=fit_intercept, 823 normalize=normalize, 824 copy=copy, 825 check_input=check_input, 826 sample_weight=sample_weight, 827 ) 828 if sample_weight is not None: 829 X, y = _rescale_data(X, y, sample_weight=sample_weight) 830 831 # FIXME: 'normalize' to be removed in 1.2 832 if hasattr(precompute, "__array__"): 833 if ( 834 fit_intercept 835 and not np.allclose(X_offset, np.zeros(n_features)) 836 or normalize 837 and not np.allclose(X_scale, np.ones(n_features)) 838 ): 839 warnings.warn( 840 "Gram matrix was provided but X was centered to fit " 841 "intercept, or X was normalized : recomputing Gram matrix.", 842 UserWarning, 843 ) 844 # recompute Gram 845 precompute = "auto" 846 Xy = None 847 elif check_input: 848 # If we're going to use the user's precomputed gram matrix, we 849 # do a quick check to make sure its not totally bogus. 850 _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale) 851 852 # precompute if n_samples > n_features 853 if isinstance(precompute, str) and precompute == "auto": 854 precompute = n_samples > n_features 855 856 if precompute is True: 857 # make sure that the 'precompute' array is contiguous. 858 precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order="C") 859 np.dot(X.T, X, out=precompute) 860 861 if not hasattr(precompute, "__array__"): 862 Xy = None # cannot use Xy if precompute is not Gram 863 864 if hasattr(precompute, "__array__") and Xy is None: 865 common_dtype = np.find_common_type([X.dtype, y.dtype], []) 866 if y.ndim == 1: 867 # Xy is 1d, make sure it is contiguous. 868 Xy = np.empty(shape=n_features, dtype=common_dtype, order="C") 869 np.dot(X.T, y, out=Xy) 870 else: 871 # Make sure that Xy is always F contiguous even if X or y are not 872 # contiguous: the goal is to make it fast to extract the data for a 873 # specific target. 874 n_targets = y.shape[1] 875 Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order="F") 876 np.dot(y.T, X, out=Xy.T) 877 878 return X, y, X_offset, y_offset, X_scale, precompute, Xy 879