scikit-learn · rth · Sep 6, 2019 · Apr 12, 2020 · Apr 12, 2020 · Apr 13, 2020
diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
@@ -5,9 +5,9 @@ Developing scikit-learn estimators
 ==================================
 
 Whether you are proposing an estimator for inclusion in scikit-learn,
-developing a separate package compatible with scikit-learn, or 
-implementing custom components for your own projects, this chapter 
-details how to develop objects that safely interact with scikit-learn 
+developing a separate package compatible with scikit-learn, or
+implementing custom components for your own projects, this chapter
+details how to develop objects that safely interact with scikit-learn
 Pipelines and model selection tools.
 
 .. currentmodule:: sklearn
@@ -576,10 +576,10 @@ closed-form solutions.
 Coding guidelines
 =================
 
-The following are some guidelines on how new code should be written for 
-inclusion in scikit-learn, and which may be appropriate to adopt in external 
-projects. Of course, there are special cases and there will be exceptions to 
-these rules. However, following these rules when submitting new code makes 
+The following are some guidelines on how new code should be written for
+inclusion in scikit-learn, and which may be appropriate to adopt in external
+projects. Of course, there are special cases and there will be exceptions to
+these rules. However, following these rules when submitting new code makes
 the review easier so new code can be integrated in less time.
 
 Uniformly formatted code makes it easier to share code ownership. The
@@ -709,3 +709,95 @@ The reason for this setup is reproducibility:
 when an estimator is ``fit`` twice to the same data,
 it should produce an identical model both times,
 hence the validation in ``fit``, not ``__init__``.
+
+Estimator callbacks
+===================
+
+.. note:: Callbacks are currently experimental and the API can change without
+          notice.
+
+To add (optional) support of callbacks, for instance to support progress
+bars or monitoring convergence, the estimator must implement the following
+points:
+
+- At the beginning of ``fit`` either explicitly call
+  ``self._eval_callbacks(method='on_fit_begin', X=X, y=y)``
+  or use ``self._validate_data(X, y)`` which makes this call internally.
+- For iterative solvers call ``self._eval_callbacks(n_iter=.., **kwargs)`` at
+  each iteration, where ``kwargs`` keys must be part of supported callback
+  arguments (cf. list below).
+
+User defined callbacks must extend the ``sklearn._callbacks.BaseCallback``
+abstract base class and define the following methods,
+
+.. code:: python
+
+    from sklearn._callbacks import BaseCallback
+
+    class MyCallback(BaseCallback):
+
+        def on_fit_begin(self, estimator, X, y):
+            ...
+
+        def on_iter_end(self, **kwargs):
+            ...
+
+
+For more details, see the Callback API documentation below.
+
+Callback API
+------------
+
+on_fit_begin method
+^^^^^^^^^^^^^^^^^^^
+
+Parameters
+""""""""""
+
+X: array_like
+  Training data
+
+y: array_like
+  Target values
+
+Returns
+"""""""
+
+Any. The return value is ignored.
+
+on_iter_end method
+^^^^^^^^^^^^^^^^^^
+
+Parameters
+""""""""""
+
+n_iter: int
+  current iteration number for iterative solvers.
+
+max_iter: int
+  maximum number of iterations for iterative solvers. If the estimator
+  has a ``max_iter`` init parameter, this will be inferred.
+
+loss: float or ordered dict
+  cost function value or error at a given iteration. When ordered dict,
+  multiple loss functions can given, with the default loss being the first
+  element.  Lower is better.
+
+score: float or ordered dict
+  same as ``loss`` parameter, but for evaluation metrics. Higher is better.
+
+validation_loss: float or ordered dict
+  cost function value or error at a given iteration, evaluated on the
+  validation set.
+
+validation_score: float or ordered dict
+  same as ``validation_loss`` parameter, but for evaluation metrics. Higher is
+  better.
+
+coef: ndarray
+  coefficients of linear models.
+
+Returns
+"""""""
+
+Any. The return value is ignored.
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,3 +13,6 @@ requires = [
     "numpy==1.17.3; python_version>='3.8' and platform_system=='AIX'",
     "scipy>=0.19.1",
 ]
+
+[tool.black]
+line-length = 79
diff --git a/sklearn/_callbacks.py b/sklearn/_callbacks.py
@@ -0,0 +1,57 @@
+# License: BSD 3 clause
+from typing import List, Callable, Optional
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+CALLBACK_PARAM_TYPES = {
+    "n_iter": int,
+    "max_iter": int,
+    "loss": (float, dict),
+    "score": (float, dict),
+    "validation_loss": (float, dict),
+    "validation_score": (float, dict),
+    "coef": np.ndarray,
+    "intercept": (np.ndarray, float),
+}
+
+
+def _check_callback_params(**kwargs):
+    invalid_params = []
+    invalid_types = []
+    for key, val in kwargs.items():
+        if key not in CALLBACK_PARAM_TYPES:
+            invalid_params.append(key)
+        else:
+            val_types = CALLBACK_PARAM_TYPES[key]
+            if not isinstance(val, val_types):
+                invalid_types.append(f"{key}={val} is not of type {val_types}")
+    msg = ""
+    if invalid_params:
+        msg += ("Invalid callback parameters: {}, must be one of {}. ").format(
+            ", ".join(invalid_params), ", ".join(CALLBACK_PARAM_TYPES.keys())
+        )
+    if invalid_types:
+        msg += "Invalid callback parameters: " + ", ".join(invalid_types)
+    if msg:
+        raise ValueError(msg)
+
+
+def _eval_callbacks(
+    callbacks: Optional[List[Callable]], method="on_iter_end", **kwargs
+) -> None:
+    if callbacks is None:
+        return
+
+    for callback in callbacks:
+        getattr(callback, method)(**kwargs)
+
+
+class BaseCallback(ABC):
+    @abstractmethod
+    def on_fit_begin(self, estimator, X, y) -> None:
+        pass
+
+    @abstractmethod
+    def on_iter_end(self, **kwargs) -> None:
+        pass
diff --git a/sklearn/base.py b/sklearn/base.py
@@ -14,6 +14,7 @@
 
 from . import __version__
 from ._config import get_config
+from sklearn._callbacks import BaseCallback
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
@@ -84,6 +85,11 @@ def clone(estimator, *, safe=True):
     new_object = klass(**new_object_params)
     params_set = new_object.get_params(deep=False)
 
+    # copy callbacks
+    if hasattr(estimator, "_callbacks"):
+        # TODO: do we need to use the recusive setter here?
+        new_object._callbacks = estimator._callbacks
+
     # quick sanity check of the parameters of the clone
     for name in new_object_params:
         param1 = new_object_params[name]
@@ -406,6 +412,7 @@ def _validate_data(self, X, y=None, reset=True,
         out : {ndarray, sparse matrix} or tuple of these
             The validated input. A tuple is returned if `y` is not None.
         """
+        self._eval_callbacks('on_fit_begin', X=X, y=y)
 
         if y is None:
             if self._get_tags()['requires_y']:
@@ -433,6 +440,46 @@ def _validate_data(self, X, y=None, reset=True,
 
         return out
 
+    def _set_callbacks(self, callbacks, deep: bool = True):
+        """Set callbacks for the estimator.
+
+        Parameters
+        ----------
+        callbacks : callback or list of callbacks
+            the callbacks to set.
+
+        deep: bool=True
+            If True, in the case of meta-estmators, callbacks are also set
+            recursively for all child estimators.
+        """
+        if isinstance(callbacks, BaseCallback):
+            self._callbacks = [callbacks]
+        else:
+            self._callbacks = callbacks
+
+        if not deep:
+            return
+
+        # set callbacks recursively
+        for attr_name in getattr(self, "_required_parameters", []):
+            # likely a meta-estimator
+            if attr_name in ['steps', 'transformers']:
+                for attr in getattr(self, attr_name):
+                    if isinstance(attr, BaseEstimator):
+                        attr._set_callbacks(callbacks)
+                    elif (hasattr(attr, '__len__')
+                          and len(attr) >= 2
+                          and isinstance(attr[1], BaseEstimator)):
+                        attr[1]._set_callbacks(callbacks)
+
+    def _eval_callbacks(self, method='on_iter_end', **kwargs):
+        """Call callbacks, e.g. in each iteration of an iterative solver"""
+        from ._callbacks import _eval_callbacks
+
+        callbacks = getattr(self, '_callbacks', None)
+
+        _eval_callbacks(callbacks, method=method, estimator=self, **kwargs)
+
     @property
     def _repr_html_(self):
         """HTML representation of estimator.

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
@@ -516,6 +516,7 @@ def fit_transform(self, X, y=None):
             sparse matrices.
 
         """
+        self._eval_callbacks(method='on_fit_begin', X=X, y=y)
         # TODO: this should be `feature_names_in_` when we start having it
         if hasattr(X, "columns"):
             self._feature_names_in = np.asarray(X.columns)

diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py
@@ -236,6 +236,7 @@ def my_svd(X):
             old_ll = ll
 
             psi = np.maximum(var - np.sum(W ** 2, axis=0), SMALL)
+            self._eval_callbacks(n_iter=i)
         else:
             warnings.warn('FactorAnalysis did not converge.' +
                           ' You might want' +

diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
@@ -206,12 +206,14 @@ def fit(self, X, y=None):
         else:
             self.batch_size_ = self.batch_size
 
-        for batch in gen_batches(n_samples, self.batch_size_,
-                                 min_batch_size=self.n_components or 0):
+        for n_batch, batch in enumerate(
+                gen_batches(n_samples, self.batch_size_,
+                            min_batch_size=self.n_components or 0)):
             X_batch = X[batch]
             if sparse.issparse(X_batch):
                 X_batch = X_batch.toarray()
             self.partial_fit(X_batch, check_input=False)
+            self._eval_callbacks(n_iter=n_batch)
 
         return self
 

diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
@@ -464,6 +464,7 @@ def _em_step(self, X, total_samples, batch_update, parallel=None):
         self.exp_dirichlet_component_ = np.exp(
             _dirichlet_expectation_2d(self.components_))
         self.n_batch_iter_ += 1
+        self._eval_callbacks()
         return
 
     def _more_tags(self):

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
@@ -19,6 +19,7 @@
 from ..utils import check_random_state, check_array
 from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
 from ..utils.validation import check_is_fitted, check_non_negative
+from .._callbacks import _eval_callbacks
 from ..utils.validation import _deprecate_positional_args
 
 EPSILON = np.finfo(np.float32).eps
@@ -426,7 +427,8 @@ def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle,
 
 def _fit_coordinate_descent(X, W, H, tol=1e-4, max_iter=200, l1_reg_W=0,
                             l1_reg_H=0, l2_reg_W=0, l2_reg_H=0, update_H=True,
-                            verbose=0, shuffle=False, random_state=None):
+                            verbose=0, shuffle=False, random_state=None,
+                            callbacks=None):
     """Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent
 
     The objective function is minimized with an alternating minimization of W
@@ -522,6 +524,10 @@ def _fit_coordinate_descent(X, W, H, tol=1e-4, max_iter=200, l1_reg_W=0,
         if verbose:
             print("violation:", violation / violation_init)
 
+        _eval_callbacks(callbacks, n_iter=n_iter,
+                        tol=violation/violation_init,
+                        error=violation)
+
         if violation / violation_init <= tol:
             if verbose:
                 print("Converged at iteration", n_iter + 1)
@@ -710,7 +716,7 @@ def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma):
 def _fit_multiplicative_update(X, W, H, beta_loss='frobenius',
                                max_iter=200, tol=1e-4,
                                l1_reg_W=0, l1_reg_H=0, l2_reg_W=0, l2_reg_H=0,
-                               update_H=True, verbose=0):
+                               update_H=True, verbose=0, callbacks=None):
     """Compute Non-negative Matrix Factorization with Multiplicative Update
 
     The objective function is _beta_divergence(X, WH) and is minimized with an
@@ -828,6 +834,9 @@ def _fit_multiplicative_update(X, W, H, beta_loss='frobenius',
                 print("Epoch %02d reached after %.3f seconds, error: %f" %
                       (n_iter, iter_time - start_time, error))
 
+            _eval_callbacks(callbacks, n_iter=n_iter, error=error,
+                            tol=(previous_error - error) / error_at_init)
+
             if (previous_error - error) / error_at_init < tol:
                 break
             previous_error = error
@@ -847,7 +856,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *,
                                beta_loss='frobenius', tol=1e-4,
                                max_iter=200, alpha=0., l1_ratio=0.,
                                regularization=None, random_state=None,
-                               verbose=0, shuffle=False):
+                               verbose=0, shuffle=False, callbacks=None):
     r"""Compute Non-negative Matrix Factorization (NMF)
 
     Find two non-negative matrices (W, H) whose product approximates the non-
@@ -1062,12 +1071,13 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *,
                                                update_H=update_H,
                                                verbose=verbose,
                                                shuffle=shuffle,
-                                               random_state=random_state)
+                                               random_state=random_state,
+                                               callbacks=callbacks)
     elif solver == 'mu':
         W, H, n_iter = _fit_multiplicative_update(X, W, H, beta_loss, max_iter,
                                                   tol, l1_reg_W, l1_reg_H,
                                                   l2_reg_W, l2_reg_H, update_H,
-                                                  verbose)
+                                                  verbose, callbacks=callbacks)
 
     else:
         raise ValueError("Invalid solver parameter '%s'." % solver)
@@ -1286,7 +1296,7 @@ def fit_transform(self, X, y=None, W=None, H=None):
             tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
             l1_ratio=self.l1_ratio, regularization='both',
             random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle)
+            shuffle=self.shuffle, callbacks=getattr(self, "_callbacks", None))
 
         self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,
                                                     square_root=True)
@@ -1335,7 +1345,7 @@ def transform(self, X):
             beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter,
             alpha=self.alpha, l1_ratio=self.l1_ratio, regularization='both',
             random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle)
+            shuffle=self.shuffle, callbacks=getattr(self, '_callbacks', None))
 
         return W