scikit-learn
diff --git a/‎.travis.yml
Lines changed: 34 additions & 36 deletions b/‎.travis.yml
Lines changed: 34 additions & 36 deletions
diff --git a/‎benchmarks/bench_plot_nmf.py
Lines changed: 2 additions & 2 deletions b/‎benchmarks/bench_plot_nmf.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/conf.py
Lines changed: 2 additions & 3 deletions b/‎doc/conf.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎doc/developers/performance.rst
Lines changed: 0 additions & 33 deletions b/‎doc/developers/performance.rst
Lines changed: 0 additions & 33 deletions
diff --git a/‎doc/faq.rst
Lines changed: 2 additions & 1 deletion b/‎doc/faq.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/modules/classes.rst
Lines changed: 3 additions & 0 deletions b/‎doc/modules/classes.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎doc/modules/computational_performance.rst
Lines changed: 19 additions & 0 deletions b/‎doc/modules/computational_performance.rst
Lines changed: 19 additions & 0 deletions
diff --git a/‎doc/modules/cross_validation.rst
Lines changed: 15 additions & 15 deletions b/‎doc/modules/cross_validation.rst
Lines changed: 15 additions & 15 deletions
diff --git a/‎doc/modules/grid_search.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/grid_search.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/model_evaluation.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/model_evaluation.rst
Lines changed: 1 addition & 1 deletion
@@ -3,62 +3,60 @@ sudo: false
 
 language: python
 
-# Pre-install packages for the ubuntu distribution
 cache:
   apt: true
   directories:
   - $HOME/.cache/pip
-addons:
-  apt:
-    packages:
-      # these only required by the DISTRIB="ubuntu" builds:
-      - python-scipy
-      - libatlas3gf-base
-      - libatlas-dev
+
 dist: trusty
+
 env:
   global:
     # Directory where tests are run from
     - TEST_DIR=/tmp/sklearn
     - OMP_NUM_THREADS=4
     - OPENBLAS_NUM_THREADS=4
-  matrix:
+
+matrix:
+  include:
     # This environment tests that scikit-learn can be built against
     # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04
-    - DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
-      COVERAGE=true
+    - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
+           COVERAGE=true
+      addons:
+        apt:
+          packages:
+            # these only required by the DISTRIB="ubuntu" builds:
+            - python-scipy
+            - libatlas3gf-base
+            - libatlas-dev
     # This environment tests the oldest supported anaconda env
-    - DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
-      NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
-      COVERAGE=true
-    # This environment tests the newest supported Anaconda release (4.3.1)
+    - env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
+           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.5"
+           COVERAGE=true
+    # This environment tests the newest supported Anaconda release (4.4.0)
     # It also runs tests requiring Pandas.
-    - DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
-      NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
-      CYTHON_VERSION="0.25.2" COVERAGE=true
+    - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
+           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
+           CYTHON_VERSION="0.25.2" COVERAGE=true
     # This environment use pytest to run the tests. It uses the newest
-    # supported Anaconda release (4.3.1). It also runs tests requiring Pandas.
-    - USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
-      NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
-      CYTHON_VERSION="0.25.2"
+    # supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
+    # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1"
+    #        INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0"
+    #        PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2"
     # flake8 linting on diff wrt common ancestor with upstream/master
-    - RUN_FLAKE8="true" SKIP_TESTS="true"
-      DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
-      NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
-
-
-matrix:
-  allow_failures:
-    # allow_failures seems to be keyed on the python version
-    # We are using this to allow failures for DISTRIB=scipy-dev-wheels
-    - python: 3.5
-
-  include:
+    - env: RUN_FLAKE8="true" SKIP_TESTS="true"
+           DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
+           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
     # This environment tests scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
-    -  python: 3.5
-       env: DISTRIB="scipy-dev-wheels"
+  #   -  python: 3.5
+  #      env: DISTRIB="scipy-dev-wheels"
+  # allow_failures:
+  #   # allow_failures seems to be keyed on the python version
+  #   # We are using this to allow failures for DISTRIB=scipy-dev-wheels
+  #   - python: 3.5
 
 install: source build_tools/travis/install.sh
 script: bash build_tools/travis/test_script.sh
 
@@ -24,7 +24,7 @@
 from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
 from sklearn.externals.joblib import Memory
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils.extmath import fast_dot, safe_sparse_dot, squared_norm
+from sklearn.utils.extmath import safe_sparse_dot, squared_norm
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted, check_non_negative
 
@@ -99,7 +99,7 @@ def _nls_subproblem(X, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
     http://www.csie.ntu.edu.tw/~cjlin/nmf/
     """
     WtX = safe_sparse_dot(W.T, X)
-    WtW = fast_dot(W.T, W)
+    WtW = np.dot(W.T, W)
 
     # values justified in the paper (alpha is renamed gamma)
     gamma = 1
 
@@ -236,13 +236,12 @@
 
 sphinx_gallery_conf = {
     'doc_module': 'sklearn',
+    'backreferences_dir': os.path.join('modules', 'generated'),
     'reference_url': {
         'sklearn': None,
         'matplotlib': 'http://matplotlib.org',
         'numpy': 'http://docs.scipy.org/doc/numpy-1.8.1',
-        'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'},
-    'expected_failing_examples': [
-        '../examples/applications/plot_stock_market.py']
+        'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'}
 }
 
 
 
@@ -84,38 +84,6 @@ C/C++ generated files are embedded in distributed stable packages. The goal is
 to make it possible to install scikit-learn stable version
 on any machine with Python, Numpy, Scipy and C/C++ compiler.
 
-Fast matrix multiplications
-===========================
-
-Matrix multiplications (matrix-matrix and matrix-vector) are usually handled
-using the NumPy function ``np.dot``, but in versions of NumPy before 1.7.2
-this function is suboptimal when the inputs are not both in the C (row-major)
-layout; in that case, the inputs may be implicitly copied to obtain the right
-layout. This obviously consumes memory and takes time.
-
-The function ``fast_dot`` in ``sklearn.utils.extmath`` offers a fast
-replacement for ``np.dot`` that prevents copies from being made in some cases.
-In all other cases, it dispatches to ``np.dot`` and when the NumPy version is
-new enough, it is in fact an alias for that function, making it a drop-in
-replacement. Example usage of ``fast_dot``::
-
-  >>> import numpy as np
-  >>> from sklearn.utils.extmath import fast_dot
-  >>> X = np.random.random_sample([2, 10])
-  >>> np.allclose(np.dot(X, X.T), fast_dot(X, X.T))
-  True
-
-This function operates optimally on 2-dimensional arrays, both of the same
-dtype, which should be either single or double precision float. If these
-requirements aren't met or the BLAS package is not available, the call is
-silently dispatched to ``numpy.dot``. If you want to be sure when the original
-``numpy.dot`` has been invoked in a situation where it is suboptimal, you can
-activate the related warning::
-
-  >>> import warnings
-  >>> from sklearn.exceptions import NonBLASDotWarning
-  >>> warnings.simplefilter('always', NonBLASDotWarning) # doctest: +SKIP
-
 .. _profiling-python-code:
 
 Profiling Python code
@@ -425,4 +393,3 @@ A sample algorithmic trick: warm restarts for cross validation
 
 TODO: demonstrate the warm restart tricks for cross validation of linear
 regression with Coordinate Descent.
-
@@ -25,7 +25,8 @@ How can I contribute to scikit-learn?
 -----------------------------------------
 See :ref:`contributing`. Before wanting to add a new algorithm, which is
 usually a major and lengthy undertaking, it is recommended to start with :ref:`known
-issues <easy_issues>`.
+issues <easy_issues>`. Please do not contact the contributors of scikit-learn directly
+regarding contributing to scikit-learn.
 
 What's the best way to get help on scikit-learn usage?
 --------------------------------------------------------------
 
@@ -40,6 +40,9 @@ Functions
    :template: function.rst
 
    base.clone
+   config_context
+   set_config
+   get_config
 
 
 .. _cluster_ref:
 
@@ -68,6 +68,25 @@ To benchmark different estimators for your case you can simply change the
 :ref:`sphx_glr_auto_examples_applications_plot_prediction_latency.py`. This should give
 you an estimate of the order of magnitude of the prediction latency.
 
+.. topic:: Configuring Scikit-learn for reduced validation overhead
+
+    Scikit-learn does some validation on data that increases the overhead per
+    call to ``predict`` and similar functions. In particular, checking that
+    features are finite (not NaN or infinite) involves a full pass over the
+    data. If you ensure that your data is acceptable, you may suppress
+    checking for finiteness by setting the environment variable
+    ``SKLEARN_ASSUME_FINITE`` to a non-empty string before importing
+    scikit-learn, or configure it in Python with :func:`sklearn.set_config`.
+    For more control than these global settings, a :func:`config_context`
+    allows you to set this configuration within a specified context::
+
+      >>> import sklearn
+      >>> with sklearn.config_context(assume_finite=True):
+      ...    pass  # do learning/prediction here with reduced validation
+
+    Note that this will affect all uses of
+    :func:`sklearn.utils.assert_all_finite` within the context.
+
 Influence of the Number of Features
 -----------------------------------
 
 
@@ -464,7 +464,7 @@ In this case we would like to know if a model trained on a particular set of
 groups generalizes well to the unseen groups. To measure this, we need to
 ensure that all the samples in the validation fold come from groups that are
 not represented at all in the paired training fold.
- 
+
 The following cross-validation splitters can be used to do that.
 The grouping identifier for the samples is specified via the ``groups``
 parameter.
@@ -601,29 +601,29 @@ samples that are part of the validation set, and to -1 for all other samples.
 Cross validation of time series data
 ====================================
 
-Time series data is characterised by the correlation between observations 
-that are near in time (*autocorrelation*). However, classical 
-cross-validation techniques such as :class:`KFold` and 
-:class:`ShuffleSplit` assume the samples are independent and 
-identically distributed, and would result in unreasonable correlation 
-between training and testing instances (yielding poor estimates of 
-generalisation error) on time series data. Therefore, it is very important 
-to evaluate our model for time series data on the "future" observations 
-least like those that are used to train the model. To achieve this, one 
+Time series data is characterised by the correlation between observations
+that are near in time (*autocorrelation*). However, classical
+cross-validation techniques such as :class:`KFold` and
+:class:`ShuffleSplit` assume the samples are independent and
+identically distributed, and would result in unreasonable correlation
+between training and testing instances (yielding poor estimates of
+generalisation error) on time series data. Therefore, it is very important
+to evaluate our model for time series data on the "future" observations
+least like those that are used to train the model. To achieve this, one
 solution is provided by :class:`TimeSeriesSplit`.
 
 
 Time Series Split
 -----------------
 
-:class:`TimeSeriesSplit` is a variation of *k-fold* which 
-returns first :math:`k` folds as train set and the :math:`(k+1)` th 
-fold as test set. Note that unlike standard cross-validation methods, 
+:class:`TimeSeriesSplit` is a variation of *k-fold* which
+returns first :math:`k` folds as train set and the :math:`(k+1)` th
+fold as test set. Note that unlike standard cross-validation methods,
 successive training sets are supersets of those that come before them.
 Also, it adds all surplus data to the first training partition, which
 is always used to train the model.
 
-This class can be used to cross-validate time series data samples 
+This class can be used to cross-validate time series data samples
 that are observed at fixed time intervals.
 
 Example of 3-split time series cross-validation on a dataset with 6 samples::
@@ -634,7 +634,7 @@ Example of 3-split time series cross-validation on a dataset with 6 samples::
   >>> y = np.array([1, 2, 3, 4, 5, 6])
   >>> tscv = TimeSeriesSplit(n_splits=3)
   >>> print(tscv)  # doctest: +NORMALIZE_WHITESPACE
-  TimeSeriesSplit(n_splits=3)
+  TimeSeriesSplit(max_train_size=None, n_splits=3)
   >>> for train, test in tscv.split(X):
   ...     print("%s %s" % (train, test))
   [0 1 2] [3]
 
@@ -14,7 +14,7 @@ estimator classes. Typical examples include ``C``, ``kernel`` and ``gamma``
 for Support Vector Classifier, ``alpha`` for Lasso, etc.
 
 It is possible and recommended to search the hyper-parameter space for the
-best :ref:`cross_validation` score.
+best :ref:`cross validation <cross_validation>` score.
 
 Any parameter provided when constructing an estimator may be optimized in this
 manner. Specifically, to find the names and current values for all parameters
 
@@ -173,7 +173,7 @@ Here is an example of building custom scorers, and of using the
     >>> #  and predictions defined below.
     >>> loss  = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=True)
-    >>> ground_truth = [[1, 1]]
+    >>> ground_truth = [[1], [1]]
     >>> predictions  = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)