1import numpy as np
2
3from ..base import BaseEstimator, ClassifierMixin
4from .validation import _num_samples, check_array, check_is_fitted
5
6
7class ArraySlicingWrapper:
8    """
9    Parameters
10    ----------
11    array
12    """
13
14    def __init__(self, array):
15        self.array = array
16
17    def __getitem__(self, aslice):
18        return MockDataFrame(self.array[aslice])
19
20
21class MockDataFrame:
22    """
23    Parameters
24    ----------
25    array
26    """
27
28    # have shape and length but don't support indexing.
29
30    def __init__(self, array):
31        self.array = array
32        self.values = array
33        self.shape = array.shape
34        self.ndim = array.ndim
35        # ugly hack to make iloc work.
36        self.iloc = ArraySlicingWrapper(array)
37
38    def __len__(self):
39        return len(self.array)
40
41    def __array__(self, dtype=None):
42        # Pandas data frames also are array-like: we want to make sure that
43        # input validation in cross-validation does not try to call that
44        # method.
45        return self.array
46
47    def __eq__(self, other):
48        return MockDataFrame(self.array == other.array)
49
50    def __ne__(self, other):
51        return not self == other
52
53    def take(self, indices, axis=0):
54        return MockDataFrame(self.array.take(indices, axis=axis))
55
56
57class CheckingClassifier(ClassifierMixin, BaseEstimator):
58    """Dummy classifier to test pipelining and meta-estimators.
59
60    Checks some property of `X` and `y`in fit / predict.
61    This allows testing whether pipelines / cross-validation or metaestimators
62    changed the input.
63
64    Can also be used to check if `fit_params` are passed correctly, and
65    to force a certain score to be returned.
66
67    Parameters
68    ----------
69    check_y, check_X : callable, default=None
70        The callable used to validate `X` and `y`. These callable should return
71        a bool where `False` will trigger an `AssertionError`.
72
73    check_y_params, check_X_params : dict, default=None
74        The optional parameters to pass to `check_X` and `check_y`.
75
76    methods_to_check : "all" or list of str, default="all"
77        The methods in which the checks should be applied. By default,
78        all checks will be done on all methods (`fit`, `predict`,
79        `predict_proba`, `decision_function` and `score`).
80
81    foo_param : int, default=0
82        A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1
83        otherwise it is 0.
84
85    expected_fit_params : list of str, default=None
86        A list of the expected parameters given when calling `fit`.
87
88    Attributes
89    ----------
90    classes_ : int
91        The classes seen during `fit`.
92
93    n_features_in_ : int
94        The number of features seen during `fit`.
95
96    Examples
97    --------
98    >>> from sklearn.utils._mocking import CheckingClassifier
99
100    This helper allow to assert to specificities regarding `X` or `y`. In this
101    case we expect `check_X` or `check_y` to return a boolean.
102
103    >>> from sklearn.datasets import load_iris
104    >>> X, y = load_iris(return_X_y=True)
105    >>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))
106    >>> clf.fit(X, y)
107    CheckingClassifier(...)
108
109    We can also provide a check which might raise an error. In this case, we
110    expect `check_X` to return `X` and `check_y` to return `y`.
111
112    >>> from sklearn.utils import check_array
113    >>> clf = CheckingClassifier(check_X=check_array)
114    >>> clf.fit(X, y)
115    CheckingClassifier(...)
116    """
117
118    def __init__(
119        self,
120        *,
121        check_y=None,
122        check_y_params=None,
123        check_X=None,
124        check_X_params=None,
125        methods_to_check="all",
126        foo_param=0,
127        expected_fit_params=None,
128    ):
129        self.check_y = check_y
130        self.check_y_params = check_y_params
131        self.check_X = check_X
132        self.check_X_params = check_X_params
133        self.methods_to_check = methods_to_check
134        self.foo_param = foo_param
135        self.expected_fit_params = expected_fit_params
136
137    def _check_X_y(self, X, y=None, should_be_fitted=True):
138        """Validate X and y and make extra check.
139
140        Parameters
141        ----------
142        X : array-like of shape (n_samples, n_features)
143            The data set.
144        y : array-like of shape (n_samples), default=None
145            The corresponding target, by default None.
146        should_be_fitted : bool, default=True
147            Whether or not the classifier should be already fitted.
148            By default True.
149
150        Returns
151        -------
152        X, y
153        """
154        if should_be_fitted:
155            check_is_fitted(self)
156        if self.check_X is not None:
157            params = {} if self.check_X_params is None else self.check_X_params
158            checked_X = self.check_X(X, **params)
159            if isinstance(checked_X, (bool, np.bool_)):
160                assert checked_X
161            else:
162                X = checked_X
163        if y is not None and self.check_y is not None:
164            params = {} if self.check_y_params is None else self.check_y_params
165            checked_y = self.check_y(y, **params)
166            if isinstance(checked_y, (bool, np.bool_)):
167                assert checked_y
168            else:
169                y = checked_y
170        return X, y
171
172    def fit(self, X, y, **fit_params):
173        """Fit classifier.
174
175        Parameters
176        ----------
177        X : array-like of shape (n_samples, n_features)
178            Training vector, where `n_samples` is the number of samples and
179            `n_features` is the number of features.
180
181        y : array-like of shape (n_samples, n_outputs) or (n_samples,), \
182                default=None
183            Target relative to X for classification or regression;
184            None for unsupervised learning.
185
186        **fit_params : dict of string -> object
187            Parameters passed to the ``fit`` method of the estimator
188
189        Returns
190        -------
191        self
192        """
193        assert _num_samples(X) == _num_samples(y)
194        if self.methods_to_check == "all" or "fit" in self.methods_to_check:
195            X, y = self._check_X_y(X, y, should_be_fitted=False)
196        self.n_features_in_ = np.shape(X)[1]
197        self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True))
198        if self.expected_fit_params:
199            missing = set(self.expected_fit_params) - set(fit_params)
200            if missing:
201                raise AssertionError(
202                    f"Expected fit parameter(s) {list(missing)} not seen."
203                )
204            for key, value in fit_params.items():
205                if _num_samples(value) != _num_samples(X):
206                    raise AssertionError(
207                        f"Fit parameter {key} has length {_num_samples(value)}"
208                        f"; expected {_num_samples(X)}."
209                    )
210
211        return self
212
213    def predict(self, X):
214        """Predict the first class seen in `classes_`.
215
216        Parameters
217        ----------
218        X : array-like of shape (n_samples, n_features)
219            The input data.
220
221        Returns
222        -------
223        preds : ndarray of shape (n_samples,)
224            Predictions of the first class seens in `classes_`.
225        """
226        if self.methods_to_check == "all" or "predict" in self.methods_to_check:
227            X, y = self._check_X_y(X)
228        return self.classes_[np.zeros(_num_samples(X), dtype=int)]
229
230    def predict_proba(self, X):
231        """Predict probabilities for each class.
232
233        Here, the dummy classifier will provide a probability of 1 for the
234        first class of `classes_` and 0 otherwise.
235
236        Parameters
237        ----------
238        X : array-like of shape (n_samples, n_features)
239            The input data.
240
241        Returns
242        -------
243        proba : ndarray of shape (n_samples, n_classes)
244            The probabilities for each sample and class.
245        """
246        if self.methods_to_check == "all" or "predict_proba" in self.methods_to_check:
247            X, y = self._check_X_y(X)
248        proba = np.zeros((_num_samples(X), len(self.classes_)))
249        proba[:, 0] = 1
250        return proba
251
252    def decision_function(self, X):
253        """Confidence score.
254
255        Parameters
256        ----------
257        X : array-like of shape (n_samples, n_features)
258            The input data.
259
260        Returns
261        -------
262        decision : ndarray of shape (n_samples,) if n_classes == 2\
263                else (n_samples, n_classes)
264            Confidence score.
265        """
266        if (
267            self.methods_to_check == "all"
268            or "decision_function" in self.methods_to_check
269        ):
270            X, y = self._check_X_y(X)
271        if len(self.classes_) == 2:
272            # for binary classifier, the confidence score is related to
273            # classes_[1] and therefore should be null.
274            return np.zeros(_num_samples(X))
275        else:
276            decision = np.zeros((_num_samples(X), len(self.classes_)))
277            decision[:, 0] = 1
278            return decision
279
280    def score(self, X=None, Y=None):
281        """Fake score.
282
283        Parameters
284        ----------
285        X : array-like of shape (n_samples, n_features)
286            Input data, where `n_samples` is the number of samples and
287            `n_features` is the number of features.
288
289        Y : array-like of shape (n_samples, n_output) or (n_samples,)
290            Target relative to X for classification or regression;
291            None for unsupervised learning.
292
293        Returns
294        -------
295        score : float
296            Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>
297            score=1` otherwise `score=0`).
298        """
299        if self.methods_to_check == "all" or "score" in self.methods_to_check:
300            self._check_X_y(X, Y)
301        if self.foo_param > 1:
302            score = 1.0
303        else:
304            score = 0.0
305        return score
306
307    def _more_tags(self):
308        return {"_skip_test": True, "X_types": ["1dlabel"]}
309
310
311class NoSampleWeightWrapper(BaseEstimator):
312    """Wrap estimator which will not expose `sample_weight`.
313
314    Parameters
315    ----------
316    est : estimator, default=None
317        The estimator to wrap.
318    """
319
320    def __init__(self, est=None):
321        self.est = est
322
323    def fit(self, X, y):
324        return self.est.fit(X, y)
325
326    def predict(self, X):
327        return self.est.predict(X)
328
329    def predict_proba(self, X):
330        return self.est.predict_proba(X)
331
332    def _more_tags(self):
333        return {"_skip_test": True}
334