scikit-learn · jnothman · May 25, 2018 · Dec 26, 2015 · Aug 11, 2016 · Aug 11, 2016
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -955,6 +955,7 @@ See the :ref:`metrics` section of the user guide for further details.
    metrics.pairwise_distances
    metrics.pairwise_distances_argmin
    metrics.pairwise_distances_argmin_min
+   metrics.pairwise_distances_chunked
 
 
 .. _mixture_ref:

diff --git a/doc/modules/computational_performance.rst b/doc/modules/computational_performance.rst
@@ -308,6 +308,27 @@ Debian / Ubuntu.
     or upgrade to Python 3.4 which has a new version of ``multiprocessing``
     that should be immune to this problem.
 
+.. _working_memory:
+
+Limiting Working Memory
+-----------------------
+
+Some calculations when implemented using standard numpy vectorized operations
+involve using a large amount of temporary memory.  This may potentially exhaust
+system memory.  Where computations can be performed in fixed-memory chunks, we
+attempt to do so, and allow the user to hint at the maximum size of this
+working memory (defaulting to 1GB) using :func:`sklearn.set_config` or
+:func:`config_context`.  The following suggests to limit temporary working
+memory to 128 MiB::
+
+  >>> import sklearn
+  >>> with sklearn.config_context(working_memory=128):
+  ...    pass  # do chunked work here
+
+An example of a chunked operation adhering to this setting is
+:func:`metric.pairwise_distances_chunked`, which facilitates computing
+row-wise reductions of a pairwise distance matrix.
+
 Model Compression
 -----------------
 

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -56,6 +56,9 @@ Classifiers and regressors
 - :class:`dummy.DummyRegressor` now has a ``return_std`` option in its
   ``predict`` method. The returned standard deviations will be zeros.
 
+- Added :class:`multioutput.RegressorChain` for multi-target
+  regression. :issue:`9257` by :user:`Kumar Ashutosh <thechargedneutron>`.
+
 - Added :class:`naive_bayes.ComplementNB`, which implements the Complement
   Naive Bayes classifier described in Rennie et al. (2003).
   :issue:`8190` by :user:`Michael A. Alcorn <airalcorn2>`.
@@ -115,6 +118,13 @@ Metrics
   :func:`metrics.roc_auc_score`. :issue:`3273` by
   :user:`Alexander Niederbühl <Alexander-N>`.
 
+Misc
+
+- A new configuration parameter, ``working_memory`` was added to control memory
+  consumption limits in chunked operations, such as the new
+  :func:`metrics.pairwise_distances_chunked`.  See :ref:`working_memory`.
+  :issue:`10280` by `Joel Nothman`_ and :user:`Aman Dalmia <dalmia>`.
+
 Enhancements
 ............
 
@@ -521,6 +531,12 @@ Metrics
   due to floating point error in the input.
   :issue:`9851` by :user:`Hanmin Qin <qinhanmin2014>`.
 
+- The ``batch_size`` parameter to :func:`metrics.pairwise_distances_argmin_min`
+  and :func:`metrics.pairwise_distances_argmin` is deprecated to be removed in
+  v0.22.  It no longer has any effect, as batch size is determined by global
+  ``working_memory`` config. See :ref:`working_memory`. :issue:`10280` by `Joel
+  Nothman`_ and :user:`Aman Dalmia <dalmia>`.
+
 Cluster
 
 - Deprecate ``pooling_func`` unused parameter in

diff --git a/sklearn/_config.py b/sklearn/_config.py
@@ -5,6 +5,7 @@
 
 _global_config = {
     'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE', False)),
+    'working_memory': int(os.environ.get('SKLEARN_WORKING_MEMORY', 1024))
 }
 
 
@@ -19,7 +20,7 @@ def get_config():
     return _global_config.copy()
 
 
-def set_config(assume_finite=None):
+def set_config(assume_finite=None, working_memory=None):
     """Set global scikit-learn configuration
 
     Parameters
@@ -29,9 +30,17 @@ def set_config(assume_finite=None):
         saving time, but leading to potential crashes. If
         False, validation for finiteness will be performed,
         avoiding error.  Global default: False.
+
+    working_memory : int, optional
+        If set, scikit-learn will attempt to limit the size of temporary arrays
+        to this number of MiB (per job when parallelised), often saving both
+        computation time and memory on expensive operations that can be
+        performed in chunks. Global default: 1024.
     """
     if assume_finite is not None:
         _global_config['assume_finite'] = assume_finite
+    if working_memory is not None:
+        _global_config['working_memory'] = working_memory
 
 
 @contextmanager
@@ -46,6 +55,12 @@ def config_context(**new_config):
         False, validation for finiteness will be performed,
         avoiding error.  Global default: False.
 
+    working_memory : int, optional
+        If set, scikit-learn will attempt to limit the size of temporary arrays
+        to this number of MiB (per job when parallelised), often saving both
+        computation time and memory on expensive operations that can be
+        performed in chunks. Global default: 1024.
+
     Notes
     -----
     All settings, not just those presently modified, will be returned to

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -51,6 +51,7 @@
 from .pairwise import pairwise_distances_argmin
 from .pairwise import pairwise_distances_argmin_min
 from .pairwise import pairwise_kernels
+from .pairwise import pairwise_distances_chunked
 
 from .regression import explained_variance_score
 from .regression import mean_absolute_error
@@ -106,6 +107,7 @@
     'pairwise_distances_argmin',
     'pairwise_distances_argmin_min',
     'pairwise_distances_argmin_min',
+    'pairwise_distances_chunked',
     'pairwise_kernels',
     'precision_recall_curve',
     'precision_recall_fscore_support',