1from time import time
2from collections import namedtuple
3import warnings
4
5from scipy import stats
6import numpy as np
7
8from ..base import clone
9from ..exceptions import ConvergenceWarning
10from ..preprocessing import normalize
11from ..utils import check_array, check_random_state, _safe_indexing, is_scalar_nan
12from ..utils.validation import FLOAT_DTYPES, check_is_fitted
13from ..utils._mask import _get_mask
14
15from ._base import _BaseImputer
16from ._base import SimpleImputer
17from ._base import _check_inputs_dtype
18
19
20_ImputerTriplet = namedtuple(
21    "_ImputerTriplet", ["feat_idx", "neighbor_feat_idx", "estimator"]
22)
23
24
25class IterativeImputer(_BaseImputer):
26    """Multivariate imputer that estimates each feature from all the others.
27
28    A strategy for imputing missing values by modeling each feature with
29    missing values as a function of other features in a round-robin fashion.
30
31    Read more in the :ref:`User Guide <iterative_imputer>`.
32
33    .. versionadded:: 0.21
34
35    .. note::
36
37      This estimator is still **experimental** for now: the predictions
38      and the API might change without any deprecation cycle. To use it,
39      you need to explicitly import `enable_iterative_imputer`::
40
41        >>> # explicitly require this experimental feature
42        >>> from sklearn.experimental import enable_iterative_imputer  # noqa
43        >>> # now you can import normally from sklearn.impute
44        >>> from sklearn.impute import IterativeImputer
45
46    Parameters
47    ----------
48    estimator : estimator object, default=BayesianRidge()
49        The estimator to use at each step of the round-robin imputation.
50        If `sample_posterior=True`, the estimator must support
51        `return_std` in its `predict` method.
52
53    missing_values : int or np.nan, default=np.nan
54        The placeholder for the missing values. All occurrences of
55        `missing_values` will be imputed. For pandas' dataframes with
56        nullable integer dtypes with missing values, `missing_values`
57        should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.
58
59    sample_posterior : bool, default=False
60        Whether to sample from the (Gaussian) predictive posterior of the
61        fitted estimator for each imputation. Estimator must support
62        `return_std` in its `predict` method if set to `True`. Set to
63        `True` if using `IterativeImputer` for multiple imputations.
64
65    max_iter : int, default=10
66        Maximum number of imputation rounds to perform before returning the
67        imputations computed during the final round. A round is a single
68        imputation of each feature with missing values. The stopping criterion
69        is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,
70        where `X_t` is `X` at iteration `t`. Note that early stopping is only
71        applied if `sample_posterior=False`.
72
73    tol : float, default=1e-3
74        Tolerance of the stopping condition.
75
76    n_nearest_features : int, default=None
77        Number of other features to use to estimate the missing values of
78        each feature column. Nearness between features is measured using
79        the absolute correlation coefficient between each feature pair (after
80        initial imputation). To ensure coverage of features throughout the
81        imputation process, the neighbor features are not necessarily nearest,
82        but are drawn with probability proportional to correlation for each
83        imputed target feature. Can provide significant speed-up when the
84        number of features is huge. If `None`, all features will be used.
85
86    initial_strategy : {'mean', 'median', 'most_frequent', 'constant'}, \
87            default='mean'
88        Which strategy to use to initialize the missing values. Same as the
89        `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.
90
91    imputation_order : {'ascending', 'descending', 'roman', 'arabic', \
92            'random'}, default='ascending'
93        The order in which the features will be imputed. Possible values:
94
95        - `'ascending'`: From features with fewest missing values to most.
96        - `'descending'`: From features with most missing values to fewest.
97        - `'roman'`: Left to right.
98        - `'arabic'`: Right to left.
99        - `'random'`: A random order for each round.
100
101    skip_complete : bool, default=False
102        If `True` then features with missing values during :meth:`transform`
103        which did not have any missing values during :meth:`fit` will be
104        imputed with the initial imputation method only. Set to `True` if you
105        have many features with no missing values at both :meth:`fit` and
106        :meth:`transform` time to save compute.
107
108    min_value : float or array-like of shape (n_features,), default=-np.inf
109        Minimum possible imputed value. Broadcast to shape `(n_features,)` if
110        scalar. If array-like, expects shape `(n_features,)`, one min value for
111        each feature. The default is `-np.inf`.
112
113        .. versionchanged:: 0.23
114           Added support for array-like.
115
116    max_value : float or array-like of shape (n_features,), default=np.inf
117        Maximum possible imputed value. Broadcast to shape `(n_features,)` if
118        scalar. If array-like, expects shape `(n_features,)`, one max value for
119        each feature. The default is `np.inf`.
120
121        .. versionchanged:: 0.23
122           Added support for array-like.
123
124    verbose : int, default=0
125        Verbosity flag, controls the debug messages that are issued
126        as functions are evaluated. The higher, the more verbose. Can be 0, 1,
127        or 2.
128
129    random_state : int, RandomState instance or None, default=None
130        The seed of the pseudo random number generator to use. Randomizes
131        selection of estimator features if `n_nearest_features` is not `None`,
132        the `imputation_order` if `random`, and the sampling from posterior if
133        `sample_posterior=True`. Use an integer for determinism.
134        See :term:`the Glossary <random_state>`.
135
136    add_indicator : bool, default=False
137        If `True`, a :class:`MissingIndicator` transform will stack onto output
138        of the imputer's transform. This allows a predictive estimator
139        to account for missingness despite imputation. If a feature has no
140        missing values at fit/train time, the feature won't appear on
141        the missing indicator even if there are missing values at
142        transform/test time.
143
144    Attributes
145    ----------
146    initial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`
147        Imputer used to initialize the missing values.
148
149    imputation_sequence_ : list of tuples
150        Each tuple has `(feat_idx, neighbor_feat_idx, estimator)`, where
151        `feat_idx` is the current feature to be imputed,
152        `neighbor_feat_idx` is the array of other features used to impute the
153        current feature, and `estimator` is the trained estimator used for
154        the imputation. Length is `self.n_features_with_missing_ *
155        self.n_iter_`.
156
157    n_iter_ : int
158        Number of iteration rounds that occurred. Will be less than
159        `self.max_iter` if early stopping criterion was reached.
160
161    n_features_in_ : int
162        Number of features seen during :term:`fit`.
163
164        .. versionadded:: 0.24
165
166    feature_names_in_ : ndarray of shape (`n_features_in_`,)
167        Names of features seen during :term:`fit`. Defined only when `X`
168        has feature names that are all strings.
169
170        .. versionadded:: 1.0
171
172    n_features_with_missing_ : int
173        Number of features with missing values.
174
175    indicator_ : :class:`~sklearn.impute.MissingIndicator`
176        Indicator used to add binary indicators for missing values.
177        `None` if `add_indicator=False`.
178
179    random_state_ : RandomState instance
180        RandomState instance that is generated either from a seed, the random
181        number generator or by `np.random`.
182
183    See Also
184    --------
185    SimpleImputer : Univariate imputation of missing values.
186
187    Notes
188    -----
189    To support imputation in inductive mode we store each feature's estimator
190    during the :meth:`fit` phase, and predict without refitting (in order)
191    during the :meth:`transform` phase.
192
193    Features which contain all missing values at :meth:`fit` are discarded upon
194    :meth:`transform`.
195
196    References
197    ----------
198    .. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice:
199        Multivariate Imputation by Chained Equations in R". Journal of
200        Statistical Software 45: 1-67.
201        <https://www.jstatsoft.org/article/view/v045i03>`_
202
203    .. [2] `S. F. Buck, (1960). "A Method of Estimation of Missing Values in
204        Multivariate Data Suitable for use with an Electronic Computer".
205        Journal of the Royal Statistical Society 22(2): 302-306.
206        <https://www.jstor.org/stable/2984099>`_
207
208    Examples
209    --------
210    >>> import numpy as np
211    >>> from sklearn.experimental import enable_iterative_imputer
212    >>> from sklearn.impute import IterativeImputer
213    >>> imp_mean = IterativeImputer(random_state=0)
214    >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])
215    IterativeImputer(random_state=0)
216    >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]
217    >>> imp_mean.transform(X)
218    array([[ 6.9584...,  2.       ,  3.        ],
219           [ 4.       ,  2.6000...,  6.        ],
220           [10.       ,  4.9999...,  9.        ]])
221    """
222
223    def __init__(
224        self,
225        estimator=None,
226        *,
227        missing_values=np.nan,
228        sample_posterior=False,
229        max_iter=10,
230        tol=1e-3,
231        n_nearest_features=None,
232        initial_strategy="mean",
233        imputation_order="ascending",
234        skip_complete=False,
235        min_value=-np.inf,
236        max_value=np.inf,
237        verbose=0,
238        random_state=None,
239        add_indicator=False,
240    ):
241        super().__init__(missing_values=missing_values, add_indicator=add_indicator)
242
243        self.estimator = estimator
244        self.sample_posterior = sample_posterior
245        self.max_iter = max_iter
246        self.tol = tol
247        self.n_nearest_features = n_nearest_features
248        self.initial_strategy = initial_strategy
249        self.imputation_order = imputation_order
250        self.skip_complete = skip_complete
251        self.min_value = min_value
252        self.max_value = max_value
253        self.verbose = verbose
254        self.random_state = random_state
255
256    def _impute_one_feature(
257        self,
258        X_filled,
259        mask_missing_values,
260        feat_idx,
261        neighbor_feat_idx,
262        estimator=None,
263        fit_mode=True,
264    ):
265        """Impute a single feature from the others provided.
266
267        This function predicts the missing values of one of the features using
268        the current estimates of all the other features. The `estimator` must
269        support `return_std=True` in its `predict` method for this function
270        to work.
271
272        Parameters
273        ----------
274        X_filled : ndarray
275            Input data with the most recent imputations.
276
277        mask_missing_values : ndarray
278            Input data's missing indicator matrix.
279
280        feat_idx : int
281            Index of the feature currently being imputed.
282
283        neighbor_feat_idx : ndarray
284            Indices of the features to be used in imputing `feat_idx`.
285
286        estimator : object
287            The estimator to use at this step of the round-robin imputation.
288            If `sample_posterior=True`, the estimator must support
289            `return_std` in its `predict` method.
290            If None, it will be cloned from self._estimator.
291
292        fit_mode : boolean, default=True
293            Whether to fit and predict with the estimator or just predict.
294
295        Returns
296        -------
297        X_filled : ndarray
298            Input data with `X_filled[missing_row_mask, feat_idx]` updated.
299
300        estimator : estimator with sklearn API
301            The fitted estimator used to impute
302            `X_filled[missing_row_mask, feat_idx]`.
303        """
304        if estimator is None and fit_mode is False:
305            raise ValueError(
306                "If fit_mode is False, then an already-fitted "
307                "estimator should be passed in."
308            )
309
310        if estimator is None:
311            estimator = clone(self._estimator)
312
313        missing_row_mask = mask_missing_values[:, feat_idx]
314        if fit_mode:
315            X_train = _safe_indexing(X_filled[:, neighbor_feat_idx], ~missing_row_mask)
316            y_train = _safe_indexing(X_filled[:, feat_idx], ~missing_row_mask)
317            estimator.fit(X_train, y_train)
318
319        # if no missing values, don't predict
320        if np.sum(missing_row_mask) == 0:
321            return X_filled, estimator
322
323        # get posterior samples if there is at least one missing value
324        X_test = _safe_indexing(X_filled[:, neighbor_feat_idx], missing_row_mask)
325        if self.sample_posterior:
326            mus, sigmas = estimator.predict(X_test, return_std=True)
327            imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)
328            # two types of problems: (1) non-positive sigmas
329            # (2) mus outside legal range of min_value and max_value
330            # (results in inf sample)
331            positive_sigmas = sigmas > 0
332            imputed_values[~positive_sigmas] = mus[~positive_sigmas]
333            mus_too_low = mus < self._min_value[feat_idx]
334            imputed_values[mus_too_low] = self._min_value[feat_idx]
335            mus_too_high = mus > self._max_value[feat_idx]
336            imputed_values[mus_too_high] = self._max_value[feat_idx]
337            # the rest can be sampled without statistical issues
338            inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high
339            mus = mus[inrange_mask]
340            sigmas = sigmas[inrange_mask]
341            a = (self._min_value[feat_idx] - mus) / sigmas
342            b = (self._max_value[feat_idx] - mus) / sigmas
343
344            truncated_normal = stats.truncnorm(a=a, b=b, loc=mus, scale=sigmas)
345            imputed_values[inrange_mask] = truncated_normal.rvs(
346                random_state=self.random_state_
347            )
348        else:
349            imputed_values = estimator.predict(X_test)
350            imputed_values = np.clip(
351                imputed_values, self._min_value[feat_idx], self._max_value[feat_idx]
352            )
353
354        # update the feature
355        X_filled[missing_row_mask, feat_idx] = imputed_values
356        return X_filled, estimator
357
358    def _get_neighbor_feat_idx(self, n_features, feat_idx, abs_corr_mat):
359        """Get a list of other features to predict `feat_idx`.
360
361        If `self.n_nearest_features` is less than or equal to the total
362        number of features, then use a probability proportional to the absolute
363        correlation between `feat_idx` and each other feature to randomly
364        choose a subsample of the other features (without replacement).
365
366        Parameters
367        ----------
368        n_features : int
369            Number of features in `X`.
370
371        feat_idx : int
372            Index of the feature currently being imputed.
373
374        abs_corr_mat : ndarray, shape (n_features, n_features)
375            Absolute correlation matrix of `X`. The diagonal has been zeroed
376            out and each feature has been normalized to sum to 1. Can be None.
377
378        Returns
379        -------
380        neighbor_feat_idx : array-like
381            The features to use to impute `feat_idx`.
382        """
383        if self.n_nearest_features is not None and self.n_nearest_features < n_features:
384            p = abs_corr_mat[:, feat_idx]
385            neighbor_feat_idx = self.random_state_.choice(
386                np.arange(n_features), self.n_nearest_features, replace=False, p=p
387            )
388        else:
389            inds_left = np.arange(feat_idx)
390            inds_right = np.arange(feat_idx + 1, n_features)
391            neighbor_feat_idx = np.concatenate((inds_left, inds_right))
392        return neighbor_feat_idx
393
394    def _get_ordered_idx(self, mask_missing_values):
395        """Decide in what order we will update the features.
396
397        As a homage to the MICE R package, we will have 4 main options of
398        how to order the updates, and use a random order if anything else
399        is specified.
400
401        Also, this function skips features which have no missing values.
402
403        Parameters
404        ----------
405        mask_missing_values : array-like, shape (n_samples, n_features)
406            Input data's missing indicator matrix, where `n_samples` is the
407            number of samples and `n_features` is the number of features.
408
409        Returns
410        -------
411        ordered_idx : ndarray, shape (n_features,)
412            The order in which to impute the features.
413        """
414        frac_of_missing_values = mask_missing_values.mean(axis=0)
415        if self.skip_complete:
416            missing_values_idx = np.flatnonzero(frac_of_missing_values)
417        else:
418            missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])
419        if self.imputation_order == "roman":
420            ordered_idx = missing_values_idx
421        elif self.imputation_order == "arabic":
422            ordered_idx = missing_values_idx[::-1]
423        elif self.imputation_order == "ascending":
424            n = len(frac_of_missing_values) - len(missing_values_idx)
425            ordered_idx = np.argsort(frac_of_missing_values, kind="mergesort")[n:]
426        elif self.imputation_order == "descending":
427            n = len(frac_of_missing_values) - len(missing_values_idx)
428            ordered_idx = np.argsort(frac_of_missing_values, kind="mergesort")[n:][::-1]
429        elif self.imputation_order == "random":
430            ordered_idx = missing_values_idx
431            self.random_state_.shuffle(ordered_idx)
432        else:
433            raise ValueError(
434                "Got an invalid imputation order: '{0}'. It must "
435                "be one of the following: 'roman', 'arabic', "
436                "'ascending', 'descending', or "
437                "'random'.".format(self.imputation_order)
438            )
439        return ordered_idx
440
441    def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):
442        """Get absolute correlation matrix between features.
443
444        Parameters
445        ----------
446        X_filled : ndarray, shape (n_samples, n_features)
447            Input data with the most recent imputations.
448
449        tolerance : float, default=1e-6
450            `abs_corr_mat` can have nans, which will be replaced
451            with `tolerance`.
452
453        Returns
454        -------
455        abs_corr_mat : ndarray, shape (n_features, n_features)
456            Absolute correlation matrix of `X` at the beginning of the
457            current round. The diagonal has been zeroed out and each feature's
458            absolute correlations with all others have been normalized to sum
459            to 1.
460        """
461        n_features = X_filled.shape[1]
462        if self.n_nearest_features is None or self.n_nearest_features >= n_features:
463            return None
464        with np.errstate(invalid="ignore"):
465            # if a feature in the neighborhood has only a single value
466            # (e.g., categorical feature), the std. dev. will be null and
467            # np.corrcoef will raise a warning due to a division by zero
468            abs_corr_mat = np.abs(np.corrcoef(X_filled.T))
469        # np.corrcoef is not defined for features with zero std
470        abs_corr_mat[np.isnan(abs_corr_mat)] = tolerance
471        # ensures exploration, i.e. at least some probability of sampling
472        np.clip(abs_corr_mat, tolerance, None, out=abs_corr_mat)
473        # features are not their own neighbors
474        np.fill_diagonal(abs_corr_mat, 0)
475        # needs to sum to 1 for np.random.choice sampling
476        abs_corr_mat = normalize(abs_corr_mat, norm="l1", axis=0, copy=False)
477        return abs_corr_mat
478
479    def _initial_imputation(self, X, in_fit=False):
480        """Perform initial imputation for input `X`.
481
482        Parameters
483        ----------
484        X : ndarray, shape (n_samples, n_features)
485            Input data, where `n_samples` is the number of samples and
486            `n_features` is the number of features.
487
488        in_fit : bool, default=False
489            Whether function is called in :meth:`fit`.
490
491        Returns
492        -------
493        Xt : ndarray, shape (n_samples, n_features)
494            Input data, where `n_samples` is the number of samples and
495            `n_features` is the number of features.
496
497        X_filled : ndarray, shape (n_samples, n_features)
498            Input data with the most recent imputations.
499
500        mask_missing_values : ndarray, shape (n_samples, n_features)
501            Input data's missing indicator matrix, where `n_samples` is the
502            number of samples and `n_features` is the number of features.
503
504        X_missing_mask : ndarray, shape (n_samples, n_features)
505            Input data's mask matrix indicating missing datapoints, where
506            `n_samples` is the number of samples and `n_features` is the
507            number of features.
508        """
509        if is_scalar_nan(self.missing_values):
510            force_all_finite = "allow-nan"
511        else:
512            force_all_finite = True
513
514        X = self._validate_data(
515            X,
516            dtype=FLOAT_DTYPES,
517            order="F",
518            reset=in_fit,
519            force_all_finite=force_all_finite,
520        )
521        _check_inputs_dtype(X, self.missing_values)
522
523        X_missing_mask = _get_mask(X, self.missing_values)
524        mask_missing_values = X_missing_mask.copy()
525        if self.initial_imputer_ is None:
526            self.initial_imputer_ = SimpleImputer(
527                missing_values=self.missing_values, strategy=self.initial_strategy
528            )
529            X_filled = self.initial_imputer_.fit_transform(X)
530        else:
531            X_filled = self.initial_imputer_.transform(X)
532
533        valid_mask = np.flatnonzero(
534            np.logical_not(np.isnan(self.initial_imputer_.statistics_))
535        )
536        Xt = X[:, valid_mask]
537        mask_missing_values = mask_missing_values[:, valid_mask]
538
539        return Xt, X_filled, mask_missing_values, X_missing_mask
540
541    @staticmethod
542    def _validate_limit(limit, limit_type, n_features):
543        """Validate the limits (min/max) of the feature values.
544
545        Converts scalar min/max limits to vectors of shape `(n_features,)`.
546
547        Parameters
548        ----------
549        limit: scalar or array-like
550            The user-specified limit (i.e, min_value or max_value).
551        limit_type: {'max', 'min'}
552            Type of limit to validate.
553        n_features: int
554            Number of features in the dataset.
555
556        Returns
557        -------
558        limit: ndarray, shape(n_features,)
559            Array of limits, one for each feature.
560        """
561        limit_bound = np.inf if limit_type == "max" else -np.inf
562        limit = limit_bound if limit is None else limit
563        if np.isscalar(limit):
564            limit = np.full(n_features, limit)
565        limit = check_array(limit, force_all_finite=False, copy=False, ensure_2d=False)
566        if not limit.shape[0] == n_features:
567            raise ValueError(
568                f"'{limit_type}_value' should be of "
569                f"shape ({n_features},) when an array-like "
570                f"is provided. Got {limit.shape}, instead."
571            )
572        return limit
573
574    def fit_transform(self, X, y=None):
575        """Fit the imputer on `X` and return the transformed `X`.
576
577        Parameters
578        ----------
579        X : array-like, shape (n_samples, n_features)
580            Input data, where `n_samples` is the number of samples and
581            `n_features` is the number of features.
582
583        y : Ignored
584            Not used, present for API consistency by convention.
585
586        Returns
587        -------
588        Xt : array-like, shape (n_samples, n_features)
589            The imputed input data.
590        """
591        self.random_state_ = getattr(
592            self, "random_state_", check_random_state(self.random_state)
593        )
594
595        if self.max_iter < 0:
596            raise ValueError(
597                "'max_iter' should be a positive integer. Got {} instead.".format(
598                    self.max_iter
599                )
600            )
601
602        if self.tol < 0:
603            raise ValueError(
604                "'tol' should be a non-negative float. Got {} instead.".format(self.tol)
605            )
606
607        if self.estimator is None:
608            from ..linear_model import BayesianRidge
609
610            self._estimator = BayesianRidge()
611        else:
612            self._estimator = clone(self.estimator)
613
614        self.imputation_sequence_ = []
615
616        self.initial_imputer_ = None
617
618        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(
619            X, in_fit=True
620        )
621
622        super()._fit_indicator(complete_mask)
623        X_indicator = super()._transform_indicator(complete_mask)
624
625        if self.max_iter == 0 or np.all(mask_missing_values):
626            self.n_iter_ = 0
627            return super()._concatenate_indicator(Xt, X_indicator)
628
629        # Edge case: a single feature. We return the initial ...
630        if Xt.shape[1] == 1:
631            self.n_iter_ = 0
632            return super()._concatenate_indicator(Xt, X_indicator)
633
634        self._min_value = self._validate_limit(self.min_value, "min", X.shape[1])
635        self._max_value = self._validate_limit(self.max_value, "max", X.shape[1])
636
637        if not np.all(np.greater(self._max_value, self._min_value)):
638            raise ValueError("One (or more) features have min_value >= max_value.")
639
640        # order in which to impute
641        # note this is probably too slow for large feature data (d > 100000)
642        # and a better way would be good.
643        # see: https://goo.gl/KyCNwj and subsequent comments
644        ordered_idx = self._get_ordered_idx(mask_missing_values)
645        self.n_features_with_missing_ = len(ordered_idx)
646
647        abs_corr_mat = self._get_abs_corr_mat(Xt)
648
649        n_samples, n_features = Xt.shape
650        if self.verbose > 0:
651            print("[IterativeImputer] Completing matrix with shape %s" % (X.shape,))
652        start_t = time()
653        if not self.sample_posterior:
654            Xt_previous = Xt.copy()
655            normalized_tol = self.tol * np.max(np.abs(X[~mask_missing_values]))
656        for self.n_iter_ in range(1, self.max_iter + 1):
657            if self.imputation_order == "random":
658                ordered_idx = self._get_ordered_idx(mask_missing_values)
659
660            for feat_idx in ordered_idx:
661                neighbor_feat_idx = self._get_neighbor_feat_idx(
662                    n_features, feat_idx, abs_corr_mat
663                )
664                Xt, estimator = self._impute_one_feature(
665                    Xt,
666                    mask_missing_values,
667                    feat_idx,
668                    neighbor_feat_idx,
669                    estimator=None,
670                    fit_mode=True,
671                )
672                estimator_triplet = _ImputerTriplet(
673                    feat_idx, neighbor_feat_idx, estimator
674                )
675                self.imputation_sequence_.append(estimator_triplet)
676
677            if self.verbose > 1:
678                print(
679                    "[IterativeImputer] Ending imputation round "
680                    "%d/%d, elapsed time %0.2f"
681                    % (self.n_iter_, self.max_iter, time() - start_t)
682                )
683
684            if not self.sample_posterior:
685                inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf, axis=None)
686                if self.verbose > 0:
687                    print(
688                        "[IterativeImputer] Change: {}, scaled tolerance: {} ".format(
689                            inf_norm, normalized_tol
690                        )
691                    )
692                if inf_norm < normalized_tol:
693                    if self.verbose > 0:
694                        print("[IterativeImputer] Early stopping criterion reached.")
695                    break
696                Xt_previous = Xt.copy()
697        else:
698            if not self.sample_posterior:
699                warnings.warn(
700                    "[IterativeImputer] Early stopping criterion not reached.",
701                    ConvergenceWarning,
702                )
703        Xt[~mask_missing_values] = X[~mask_missing_values]
704        return super()._concatenate_indicator(Xt, X_indicator)
705
706    def transform(self, X):
707        """Impute all missing values in `X`.
708
709        Note that this is stochastic, and that if `random_state` is not fixed,
710        repeated calls, or permuted input, results will differ.
711
712        Parameters
713        ----------
714        X : array-like of shape (n_samples, n_features)
715            The input data to complete.
716
717        Returns
718        -------
719        Xt : array-like, shape (n_samples, n_features)
720             The imputed input data.
721        """
722        check_is_fitted(self)
723
724        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(X)
725
726        X_indicator = super()._transform_indicator(complete_mask)
727
728        if self.n_iter_ == 0 or np.all(mask_missing_values):
729            return super()._concatenate_indicator(Xt, X_indicator)
730
731        imputations_per_round = len(self.imputation_sequence_) // self.n_iter_
732        i_rnd = 0
733        if self.verbose > 0:
734            print("[IterativeImputer] Completing matrix with shape %s" % (X.shape,))
735        start_t = time()
736        for it, estimator_triplet in enumerate(self.imputation_sequence_):
737            Xt, _ = self._impute_one_feature(
738                Xt,
739                mask_missing_values,
740                estimator_triplet.feat_idx,
741                estimator_triplet.neighbor_feat_idx,
742                estimator=estimator_triplet.estimator,
743                fit_mode=False,
744            )
745            if not (it + 1) % imputations_per_round:
746                if self.verbose > 1:
747                    print(
748                        "[IterativeImputer] Ending imputation round "
749                        "%d/%d, elapsed time %0.2f"
750                        % (i_rnd + 1, self.n_iter_, time() - start_t)
751                    )
752                i_rnd += 1
753
754        Xt[~mask_missing_values] = X[~mask_missing_values]
755
756        return super()._concatenate_indicator(Xt, X_indicator)
757
758    def fit(self, X, y=None):
759        """Fit the imputer on `X` and return self.
760
761        Parameters
762        ----------
763        X : array-like, shape (n_samples, n_features)
764            Input data, where `n_samples` is the number of samples and
765            `n_features` is the number of features.
766
767        y : Ignored
768            Not used, present for API consistency by convention.
769
770        Returns
771        -------
772        self : object
773            Fitted estimator.
774        """
775        self.fit_transform(X)
776        return self
777