1import numpy as np 2 3from ..base import BaseEstimator, ClassifierMixin 4from .validation import _num_samples, check_array, check_is_fitted 5 6 7class ArraySlicingWrapper: 8 """ 9 Parameters 10 ---------- 11 array 12 """ 13 14 def __init__(self, array): 15 self.array = array 16 17 def __getitem__(self, aslice): 18 return MockDataFrame(self.array[aslice]) 19 20 21class MockDataFrame: 22 """ 23 Parameters 24 ---------- 25 array 26 """ 27 28 # have shape and length but don't support indexing. 29 30 def __init__(self, array): 31 self.array = array 32 self.values = array 33 self.shape = array.shape 34 self.ndim = array.ndim 35 # ugly hack to make iloc work. 36 self.iloc = ArraySlicingWrapper(array) 37 38 def __len__(self): 39 return len(self.array) 40 41 def __array__(self, dtype=None): 42 # Pandas data frames also are array-like: we want to make sure that 43 # input validation in cross-validation does not try to call that 44 # method. 45 return self.array 46 47 def __eq__(self, other): 48 return MockDataFrame(self.array == other.array) 49 50 def __ne__(self, other): 51 return not self == other 52 53 def take(self, indices, axis=0): 54 return MockDataFrame(self.array.take(indices, axis=axis)) 55 56 57class CheckingClassifier(ClassifierMixin, BaseEstimator): 58 """Dummy classifier to test pipelining and meta-estimators. 59 60 Checks some property of `X` and `y`in fit / predict. 61 This allows testing whether pipelines / cross-validation or metaestimators 62 changed the input. 63 64 Can also be used to check if `fit_params` are passed correctly, and 65 to force a certain score to be returned. 66 67 Parameters 68 ---------- 69 check_y, check_X : callable, default=None 70 The callable used to validate `X` and `y`. These callable should return 71 a bool where `False` will trigger an `AssertionError`. 72 73 check_y_params, check_X_params : dict, default=None 74 The optional parameters to pass to `check_X` and `check_y`. 75 76 methods_to_check : "all" or list of str, default="all" 77 The methods in which the checks should be applied. By default, 78 all checks will be done on all methods (`fit`, `predict`, 79 `predict_proba`, `decision_function` and `score`). 80 81 foo_param : int, default=0 82 A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1 83 otherwise it is 0. 84 85 expected_fit_params : list of str, default=None 86 A list of the expected parameters given when calling `fit`. 87 88 Attributes 89 ---------- 90 classes_ : int 91 The classes seen during `fit`. 92 93 n_features_in_ : int 94 The number of features seen during `fit`. 95 96 Examples 97 -------- 98 >>> from sklearn.utils._mocking import CheckingClassifier 99 100 This helper allow to assert to specificities regarding `X` or `y`. In this 101 case we expect `check_X` or `check_y` to return a boolean. 102 103 >>> from sklearn.datasets import load_iris 104 >>> X, y = load_iris(return_X_y=True) 105 >>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4)) 106 >>> clf.fit(X, y) 107 CheckingClassifier(...) 108 109 We can also provide a check which might raise an error. In this case, we 110 expect `check_X` to return `X` and `check_y` to return `y`. 111 112 >>> from sklearn.utils import check_array 113 >>> clf = CheckingClassifier(check_X=check_array) 114 >>> clf.fit(X, y) 115 CheckingClassifier(...) 116 """ 117 118 def __init__( 119 self, 120 *, 121 check_y=None, 122 check_y_params=None, 123 check_X=None, 124 check_X_params=None, 125 methods_to_check="all", 126 foo_param=0, 127 expected_fit_params=None, 128 ): 129 self.check_y = check_y 130 self.check_y_params = check_y_params 131 self.check_X = check_X 132 self.check_X_params = check_X_params 133 self.methods_to_check = methods_to_check 134 self.foo_param = foo_param 135 self.expected_fit_params = expected_fit_params 136 137 def _check_X_y(self, X, y=None, should_be_fitted=True): 138 """Validate X and y and make extra check. 139 140 Parameters 141 ---------- 142 X : array-like of shape (n_samples, n_features) 143 The data set. 144 y : array-like of shape (n_samples), default=None 145 The corresponding target, by default None. 146 should_be_fitted : bool, default=True 147 Whether or not the classifier should be already fitted. 148 By default True. 149 150 Returns 151 ------- 152 X, y 153 """ 154 if should_be_fitted: 155 check_is_fitted(self) 156 if self.check_X is not None: 157 params = {} if self.check_X_params is None else self.check_X_params 158 checked_X = self.check_X(X, **params) 159 if isinstance(checked_X, (bool, np.bool_)): 160 assert checked_X 161 else: 162 X = checked_X 163 if y is not None and self.check_y is not None: 164 params = {} if self.check_y_params is None else self.check_y_params 165 checked_y = self.check_y(y, **params) 166 if isinstance(checked_y, (bool, np.bool_)): 167 assert checked_y 168 else: 169 y = checked_y 170 return X, y 171 172 def fit(self, X, y, **fit_params): 173 """Fit classifier. 174 175 Parameters 176 ---------- 177 X : array-like of shape (n_samples, n_features) 178 Training vector, where `n_samples` is the number of samples and 179 `n_features` is the number of features. 180 181 y : array-like of shape (n_samples, n_outputs) or (n_samples,), \ 182 default=None 183 Target relative to X for classification or regression; 184 None for unsupervised learning. 185 186 **fit_params : dict of string -> object 187 Parameters passed to the ``fit`` method of the estimator 188 189 Returns 190 ------- 191 self 192 """ 193 assert _num_samples(X) == _num_samples(y) 194 if self.methods_to_check == "all" or "fit" in self.methods_to_check: 195 X, y = self._check_X_y(X, y, should_be_fitted=False) 196 self.n_features_in_ = np.shape(X)[1] 197 self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True)) 198 if self.expected_fit_params: 199 missing = set(self.expected_fit_params) - set(fit_params) 200 if missing: 201 raise AssertionError( 202 f"Expected fit parameter(s) {list(missing)} not seen." 203 ) 204 for key, value in fit_params.items(): 205 if _num_samples(value) != _num_samples(X): 206 raise AssertionError( 207 f"Fit parameter {key} has length {_num_samples(value)}" 208 f"; expected {_num_samples(X)}." 209 ) 210 211 return self 212 213 def predict(self, X): 214 """Predict the first class seen in `classes_`. 215 216 Parameters 217 ---------- 218 X : array-like of shape (n_samples, n_features) 219 The input data. 220 221 Returns 222 ------- 223 preds : ndarray of shape (n_samples,) 224 Predictions of the first class seens in `classes_`. 225 """ 226 if self.methods_to_check == "all" or "predict" in self.methods_to_check: 227 X, y = self._check_X_y(X) 228 return self.classes_[np.zeros(_num_samples(X), dtype=int)] 229 230 def predict_proba(self, X): 231 """Predict probabilities for each class. 232 233 Here, the dummy classifier will provide a probability of 1 for the 234 first class of `classes_` and 0 otherwise. 235 236 Parameters 237 ---------- 238 X : array-like of shape (n_samples, n_features) 239 The input data. 240 241 Returns 242 ------- 243 proba : ndarray of shape (n_samples, n_classes) 244 The probabilities for each sample and class. 245 """ 246 if self.methods_to_check == "all" or "predict_proba" in self.methods_to_check: 247 X, y = self._check_X_y(X) 248 proba = np.zeros((_num_samples(X), len(self.classes_))) 249 proba[:, 0] = 1 250 return proba 251 252 def decision_function(self, X): 253 """Confidence score. 254 255 Parameters 256 ---------- 257 X : array-like of shape (n_samples, n_features) 258 The input data. 259 260 Returns 261 ------- 262 decision : ndarray of shape (n_samples,) if n_classes == 2\ 263 else (n_samples, n_classes) 264 Confidence score. 265 """ 266 if ( 267 self.methods_to_check == "all" 268 or "decision_function" in self.methods_to_check 269 ): 270 X, y = self._check_X_y(X) 271 if len(self.classes_) == 2: 272 # for binary classifier, the confidence score is related to 273 # classes_[1] and therefore should be null. 274 return np.zeros(_num_samples(X)) 275 else: 276 decision = np.zeros((_num_samples(X), len(self.classes_))) 277 decision[:, 0] = 1 278 return decision 279 280 def score(self, X=None, Y=None): 281 """Fake score. 282 283 Parameters 284 ---------- 285 X : array-like of shape (n_samples, n_features) 286 Input data, where `n_samples` is the number of samples and 287 `n_features` is the number of features. 288 289 Y : array-like of shape (n_samples, n_output) or (n_samples,) 290 Target relative to X for classification or regression; 291 None for unsupervised learning. 292 293 Returns 294 ------- 295 score : float 296 Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 => 297 score=1` otherwise `score=0`). 298 """ 299 if self.methods_to_check == "all" or "score" in self.methods_to_check: 300 self._check_X_y(X, Y) 301 if self.foo_param > 1: 302 score = 1.0 303 else: 304 score = 0.0 305 return score 306 307 def _more_tags(self): 308 return {"_skip_test": True, "X_types": ["1dlabel"]} 309 310 311class NoSampleWeightWrapper(BaseEstimator): 312 """Wrap estimator which will not expose `sample_weight`. 313 314 Parameters 315 ---------- 316 est : estimator, default=None 317 The estimator to wrap. 318 """ 319 320 def __init__(self, est=None): 321 self.est = est 322 323 def fit(self, X, y): 324 return self.est.fit(X, y) 325 326 def predict(self, X): 327 return self.est.predict(X) 328 329 def predict_proba(self, X): 330 return self.est.predict_proba(X) 331 332 def _more_tags(self): 333 return {"_skip_test": True} 334