scikit-learn · jnothman · Jan 17, 2019 · Mar 21, 2018 · Mar 22, 2018 · Apr 15, 2018
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1433,6 +1433,7 @@ Low-level methods
    utils.graph.single_source_shortest_path_length
    utils.graph_shortest_path.graph_shortest_path
    utils.indexable
+   utils.metaestimators.if_delegate_has_method
    utils.multiclass.type_of_target
    utils.multiclass.is_multilabel
    utils.multiclass.unique_labels

diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py
@@ -0,0 +1,118 @@
+"""
+==============================================
+Inductive Clustering
+==============================================
+
+Clustering can be expensive, especially when our dataset contains millions
+of datapoints. Many clustering algorithms are not :term:`inductive` and so
+cannot be directly applied to new data samples without recomputing the
+clustering, which may be intractable. Instead, we can use clustering to then
+learn an inductive model with a classifier, which has several benefits:
+
+- it allows the clusters to scale and apply to new data
+- unlike re-fitting the clusters to new samples, it makes sure the labelling
+  procedure is consistent over time
+- it allows us to use the inferential capabilities of the classifier to
+  describe or explain the clusters
+
+This example illustrates a generic implementation of a meta-estimator which
+extends clustering by inducing a classifier from the cluster labels.
+"""
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.base import BaseEstimator, clone
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.datasets import make_blobs
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.utils.metaestimators import if_delegate_has_method
+
+
+N_SAMPLES = 5000
+RANDOM_STATE = 42
+
+
+class InductiveClusterer(BaseEstimator):
+    def __init__(self, clusterer, classifier):
+        self.clusterer = clusterer
+        self.classifier = classifier
+
+    def fit(self, X, y=None):
+        self.clusterer_ = clone(self.clusterer)
+        self.classifier_ = clone(self.classifier)
+        y = self.clusterer_.fit_predict(X)
+        self.classifier_.fit(X, y)
+        return self
+
+    @if_delegate_has_method(delegate='classifier_')
+    def predict(self, X):
+        return self.classifier_.predict(X)
+
+    @if_delegate_has_method(delegate='classifier_')
+    def decision_function(self, X):
+        return self.classifier_.decision_function(X)
+
+
+def plot_scatter(X,  color, alpha=0.5):
+    return plt.scatter(X[:, 0],
+                       X[:, 1],
+                       c=color,
+                       alpha=alpha,
+                       edgecolor='k')
+
+
+# Generate some training data from clustering
+X, y = make_blobs(n_samples=N_SAMPLES,
+                  cluster_std=[1.0, 1.0, 0.5],
+                  centers=[(-5, -5), (0, 0), (5, 5)],
+                  random_state=RANDOM_STATE)
+
+
+# Train a clustering algorithm on the training data and get the cluster labels
+clusterer = AgglomerativeClustering(n_clusters=3)
+cluster_labels = clusterer.fit_predict(X)
+
+plt.figure(figsize=(12, 4))
+
+plt.subplot(131)
+plot_scatter(X, cluster_labels)
+plt.title("Ward Linkage")
+
+
+# Generate new samples and plot them along with the original dataset
+X_new, y_new = make_blobs(n_samples=10,
+                          centers=[(-7, -1), (-2, 4), (3, 6)],
+                          random_state=RANDOM_STATE)
+
+plt.subplot(132)
+plot_scatter(X, cluster_labels)
+plot_scatter(X_new, 'black', 1)
+plt.title("Unknown instances")
+
+
+# Declare the inductive learning model that it will be used to
+# predict cluster membership for unknown instances
+classifier = RandomForestClassifier(random_state=RANDOM_STATE)
+inductive_learner = InductiveClusterer(clusterer, classifier).fit(X)
+
+probable_clusters = inductive_learner.predict(X_new)
+
+
+plt.subplot(133)
+plot_scatter(X, cluster_labels)
+plot_scatter(X_new, probable_clusters)
+
+# Plotting decision regions
+x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
+                     np.arange(y_min, y_max, 0.1))
+
+Z = inductive_learner.predict(np.c_[xx.ravel(), yy.ravel()])
+Z = Z.reshape(xx.shape)
+
+plt.contourf(xx, yy, Z, alpha=0.4)
+plt.title("Classify unknown instances")
+
+plt.show()