scikit-learn · lesteve · Mar 27, 2018 · Mar 10, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -41,7 +41,7 @@ matrix:
     # This environment tests the newest supported Anaconda release (5.0.0)
     # It also runs tests requiring Pandas and PyAMG
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true"
-           NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" PANDAS_VERSION="0.20.3"
+           NUMPY_VERSION="1.14.2" SCIPY_VERSION="1.0.0" PANDAS_VERSION="0.20.3"
            CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" PILLOW_VERSION="4.3.0"
            COVERAGE=true
            CHECK_PYTEST_SOFT_DEPENDENCY="true" TEST_DOCSTRINGS="true"

diff --git a/conftest.py b/conftest.py
@@ -1,14 +1,31 @@
-# This file is here so that when running from the root folder
-# ./sklearn is added to sys.path by pytest.
-# See https://docs.pytest.org/en/latest/pythonpath.html for more details.
-# For example, this allows to build extensions in place and run pytest
-# doc/modules/clustering.rst and use sklearn from the local folder
-# rather than the one from site-packages.
-
-# Set numpy array str/repr to legacy behaviour on numpy > 1.13 to make
-# the doctests pass
-import numpy as np
-try:
-    np.set_printoptions(legacy='1.13')
-except TypeError:
-    pass
+# Even if empty this file is useful so that when running from the root folder
+# ./sklearn is added to sys.path by pytest. See
+# https://docs.pytest.org/en/latest/pythonpath.html for more details.  For
+# example, this allows to build extensions in place and run pytest
+# doc/modules/clustering.rst and use sklearn from the local folder rather than
+# the one from site-packages.
+
+from distutils.version import LooseVersion
+
+import pytest
+from _pytest.doctest import DoctestItem
+
+
+def pytest_collection_modifyitems(config, items):
+    # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
+    # run doctests only for numpy >= 1.14.
+    skip_doctests = True
+    try:
+        import numpy as np
+        if LooseVersion(np.__version__) >= LooseVersion('1.14'):
+            skip_doctests = False
+    except ImportError:
+        pass
+
+    if skip_doctests:
+        skip_marker = pytest.mark.skip(
+            reason='doctests are only run for numpy >= 1.14')
+
+        for item in items:
+            if isinstance(item, DoctestItem):
+                item.add_marker(skip_marker)
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
@@ -34,7 +34,7 @@ of size 28x28 pixels, labeled from 0 to 9::
   >>> mnist.target.shape
   (70000,)
   >>> np.unique(mnist.target)
-  array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])
+  array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
 
 After the first download, the dataset is cached locally in the path
 specified by the ``data_home`` keyword argument, which defaults to

diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst
@@ -71,7 +71,7 @@ For example::
     >>> random_state = 0
     >>> random_state = check_random_state(random_state)
     >>> random_state.rand(4)
-    array([ 0.5488135 ,  0.71518937,  0.60276338,  0.54488318])
+    array([0.5488135 , 0.71518937, 0.60276338, 0.54488318])
 
 When developing your own scikit-learn compatible estimator, the following
 helpers are available.

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -190,7 +190,7 @@ object::
               steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
      >>> # The pca instance can be inspected directly
      >>> print(pca1.components_) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-         [[ -1.77484909e-19  ... 4.07058917e-18]]
+         [[-1.77484909e-19  ... 4.07058917e-18]]
 
    Enabling caching triggers a clone of the transformers before fitting.
    Therefore, the transformer instance given to the pipeline cannot be
@@ -212,7 +212,7 @@ object::
                steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
      >>> print(cached_pipe.named_steps['reduce_dim'].components_)
      ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-         [[ -1.77484909e-19  ... 4.07058917e-18]]
+         [[-1.77484909e-19  ... 4.07058917e-18]]
      >>> # Remove the cache directory
      >>> rmtree(cachedir)
 

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
@@ -106,7 +106,7 @@ time)::
   >>> clf = svm.SVC(kernel='linear', C=1)
   >>> scores = cross_val_score(clf, iris.data, iris.target, cv=5)
   >>> scores                                              # doctest: +ELLIPSIS
-  array([ 0.96...,  1.  ...,  0.96...,  0.96...,  1.        ])
+  array([0.96..., 1.  ..., 0.96..., 0.96..., 1.        ])
 
 The mean score and the 95\% confidence interval of the score estimate are hence
 given by::
@@ -122,7 +122,7 @@ scoring parameter::
   >>> scores = cross_val_score(
   ...     clf, iris.data, iris.target, cv=5, scoring='f1_macro')
   >>> scores                                              # doctest: +ELLIPSIS
-  array([ 0.96...,  1.  ...,  0.96...,  0.96...,  1.        ])
+  array([0.96..., 1.  ..., 0.96..., 0.96..., 1.        ])
 
 See :ref:`scoring_parameter` for details.
 In the case of the Iris dataset, the samples are balanced across target
@@ -141,7 +141,7 @@ validation iterator instead, for instance::
   >>> cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
   >>> cross_val_score(clf, iris.data, iris.target, cv=cv)
   ...                                                     # doctest: +ELLIPSIS
-  array([ 0.97...,  0.97...,  1.        ])
+  array([0.97..., 0.97..., 1.        ])
 
 
 .. topic:: Data transformation with held out data
@@ -168,7 +168,7 @@ validation iterator instead, for instance::
       >>> clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1))
       >>> cross_val_score(clf, iris.data, iris.target, cv=cv)
       ...                                                 # doctest: +ELLIPSIS
-      array([ 0.97...,  0.93...,  0.95...])
+      array([0.97..., 0.93..., 0.95...])
 
     See :ref:`combining_estimators`.
 
@@ -212,7 +212,7 @@ predefined scorer names::
     >>> sorted(scores.keys())
     ['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']
     >>> scores['test_recall_macro']                       # doctest: +ELLIPSIS
-    array([ 0.96...,  1.  ...,  0.96...,  0.96...,  1.        ])
+    array([0.96..., 1.  ..., 0.96..., 0.96..., 1.        ])
 
 Or as a dict mapping scorer name to a predefined or custom scoring function::
 
@@ -225,7 +225,7 @@ Or as a dict mapping scorer name to a predefined or custom scoring function::
     ['fit_time', 'score_time', 'test_prec_macro', 'test_rec_micro',
      'train_prec_macro', 'train_rec_micro']
     >>> scores['train_rec_micro']                         # doctest: +ELLIPSIS
-    array([ 0.97...,  0.97...,  0.99...,  0.98...,  0.98...])
+    array([0.97..., 0.97..., 0.99..., 0.98..., 0.98...])
 
 Here is an example of ``cross_validate`` using a single metric::
 

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
@@ -782,7 +782,7 @@ accessed via the ``feature_importances_`` property::
     >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
     ...     max_depth=1, random_state=0).fit(X, y)
     >>> clf.feature_importances_  # doctest: +ELLIPSIS
-    array([ 0.11,  0.1 ,  0.11,  ...
+    array([0.11, 0.1 , 0.11, ...
 
 .. topic:: Examples:
 

diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
@@ -49,9 +49,9 @@ is a traditional numerical feature::
   >>> vec = DictVectorizer()
 
   >>> vec.fit_transform(measurements).toarray()
-  array([[  1.,   0.,   0.,  33.],
-         [  0.,   1.,   0.,  12.],
-         [  0.,   0.,   1.,  18.]])
+  array([[ 1.,  0.,  0., 33.],
+         [ 0.,  1.,  0., 12.],
+         [ 0.,  0.,  1., 18.]])
 
   >>> vec.get_feature_names()
   ['city=Dubai', 'city=London', 'city=San Francisco', 'temperature']
@@ -89,7 +89,7 @@ suitable for feeding into a classifier (maybe after being piped into a
   <1x6 sparse matrix of type '<... 'numpy.float64'>'
       with 6 stored elements in Compressed Sparse ... format>
   >>> pos_vectorized.toarray()
-  array([[ 1.,  1.,  1.,  1.,  1.,  1.]])
+  array([[1., 1., 1., 1., 1., 1.]])
   >>> vec.get_feature_names()
   ['pos+1=PP', 'pos-1=NN', 'pos-2=DT', 'word+1=on', 'word-1=cat', 'word-2=the']
 
@@ -463,12 +463,12 @@ content of the documents::
       with 9 stored elements in Compressed Sparse ... format>
 
   >>> tfidf.toarray()                        # doctest: +ELLIPSIS
-  array([[ 0.81940995,  0.        ,  0.57320793],
-         [ 1.        ,  0.        ,  0.        ],
-         [ 1.        ,  0.        ,  0.        ],
-         [ 1.        ,  0.        ,  0.        ],
-         [ 0.47330339,  0.88089948,  0.        ],
-         [ 0.58149261,  0.        ,  0.81355169]])
+  array([[0.81940995, 0.        , 0.57320793],
+         [1.        , 0.        , 0.        ],
+         [1.        , 0.        , 0.        ],
+         [1.        , 0.        , 0.        ],
+         [0.47330339, 0.88089948, 0.        ],
+         [0.58149261, 0.        , 0.81355169]])
 
 Each row is normalized to have unit Euclidean norm:
 
@@ -523,19 +523,19 @@ And the L2-normalized tf-idf changes to
 
   >>> transformer = TfidfTransformer()
   >>> transformer.fit_transform(counts).toarray()
-  array([[ 0.85151335,  0.        ,  0.52433293],
-         [ 1.        ,  0.        ,  0.        ],
-         [ 1.        ,  0.        ,  0.        ],
-         [ 1.        ,  0.        ,  0.        ],
-         [ 0.55422893,  0.83236428,  0.        ],
-         [ 0.63035731,  0.        ,  0.77630514]])
+  array([[0.85151335, 0.        , 0.52433293],
+         [1.        , 0.        , 0.        ],
+         [1.        , 0.        , 0.        ],
+         [1.        , 0.        , 0.        ],
+         [0.55422893, 0.83236428, 0.        ],
+         [0.63035731, 0.        , 0.77630514]])
 
 The weights of each
 feature computed by the ``fit`` method call are stored in a model
 attribute::
 
   >>> transformer.idf_                       # doctest: +ELLIPSIS
-  array([ 1. ...,  2.25...,  1.84...])
+  array([1. ..., 2.25..., 1.84...])
 
 
 

diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst
@@ -413,9 +413,9 @@ kernel but with the hyperparameters set to ``theta``. An illustrative example:
     >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF
     >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0))
     >>> for hyperparameter in kernel.hyperparameters: print(hyperparameter)
-    Hyperparameter(name='k1__k1__constant_value', value_type='numeric', bounds=array([[  0.,  10.]]), n_elements=1, fixed=False)
-    Hyperparameter(name='k1__k2__length_scale', value_type='numeric', bounds=array([[  0.,  10.]]), n_elements=1, fixed=False)
-    Hyperparameter(name='k2__length_scale', value_type='numeric', bounds=array([[  0.,  10.]]), n_elements=1, fixed=False)
+    Hyperparameter(name='k1__k1__constant_value', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)
+    Hyperparameter(name='k1__k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)
+    Hyperparameter(name='k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)
     >>> params = kernel.get_params()
     >>> for key in sorted(params): print("%s : %s" % (key, params[key]))
     k1 : 1**2 * RBF(length_scale=0.5)
@@ -431,9 +431,9 @@ kernel but with the hyperparameters set to ``theta``. An illustrative example:
     >>> print(kernel.theta)  # Note: log-transformed
     [ 0.         -0.69314718  0.69314718]
     >>> print(kernel.bounds)  # Note: log-transformed
-    [[       -inf  2.30258509]
-     [       -inf  2.30258509]
-     [       -inf  2.30258509]]
+    [[      -inf 2.30258509]
+     [      -inf 2.30258509]
+     [      -inf 2.30258509]]
 
 
 All Gaussian process kernels are interoperable with :mod:`sklearn.metrics.pairwise`

diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst
@@ -31,9 +31,9 @@ that contain the missing values::
     SimpleImputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)
     >>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
     >>> print(imp.transform(X))           # doctest: +NORMALIZE_WHITESPACE  +ELLIPSIS
-    [[ 4.          2.        ]
-     [ 6.          3.666...]
-     [ 7.          6.        ]]
+    [[4.          2.        ]
+     [6.          3.666...]
+     [7.          6.        ]]
 
 The :class:`SimpleImputer` class also supports sparse matrices::
 
@@ -44,13 +44,13 @@ The :class:`SimpleImputer` class also supports sparse matrices::
     SimpleImputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0)
     >>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]])
     >>> print(imp.transform(X_test))      # doctest: +NORMALIZE_WHITESPACE  +ELLIPSIS
-    [[ 4.          2.        ]
-     [ 6.          3.666...]
-     [ 7.          6.        ]]
+    [[4.          2.        ]
+     [6.          3.666...]
+     [7.          6.        ]]
 
 Note that, here, missing values are encoded by 0 and are thus implicitly stored
 in the matrix. This format is thus suitable when there are many more missing
 values than observed values.
 
 :class:`SimpleImputer` can be used in a Pipeline as a way to build a composite
-estimator that supports imputation. See :ref:`sphx_glr_auto_examples_plot_missing_values.py`.
+estimator that supports imputation. See :ref:`sphx_glr_auto_examples_plot_missing_values.py`.
diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst
@@ -83,13 +83,13 @@ The function :func:`validation_curve` can help in this case::
   >>> train_scores, valid_scores = validation_curve(Ridge(), X, y, "alpha",
   ...                                               np.logspace(-7, 3, 3))
   >>> train_scores           # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-  array([[ 0.94...,  0.92...,  0.92...],
-         [ 0.94...,  0.92...,  0.92...],
-         [ 0.47...,  0.45...,  0.42...]])
+  array([[0.94..., 0.92..., 0.92...],
+         [0.94..., 0.92..., 0.92...],
+         [0.47..., 0.45..., 0.42...]])
   >>> valid_scores           # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-  array([[ 0.90...,  0.92...,  0.94...],
-         [ 0.90...,  0.92...,  0.94...],
-         [ 0.44...,  0.39...,  0.45...]])
+  array([[0.90..., 0.92..., 0.94...],
+         [0.90..., 0.92..., 0.94...],
+         [0.44..., 0.39..., 0.45...]])
 
 If the training score and the validation score are both low, the estimator will
 be underfitting. If the training score is high and the validation score is low,
@@ -148,11 +148,11 @@ average scores on the validation sets)::
   >>> train_sizes            # doctest: +NORMALIZE_WHITESPACE
   array([ 50, 80, 110])
   >>> train_scores           # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-  array([[ 0.98...,  0.98 ,  0.98...,  0.98...,  0.98...],
-         [ 0.98...,  1.   ,  0.98...,  0.98...,  0.98...],
-         [ 0.98...,  1.   ,  0.98...,  0.98...,  0.99...]])
+  array([[0.98..., 0.98 , 0.98..., 0.98..., 0.98...],
+         [0.98..., 1.   , 0.98..., 0.98..., 0.98...],
+         [0.98..., 1.   , 0.98..., 0.98..., 0.99...]])
   >>> valid_scores           # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-  array([[ 1. ,  0.93...,  1. ,  1. ,  0.96...],
-         [ 1. ,  0.96...,  1. ,  1. ,  0.96...],
-         [ 1. ,  0.96...,  1. ,  1. ,  0.96...]])
+  array([[1. ,  0.93...,  1. ,  1. ,  0.96...],
+         [1. ,  0.96...,  1. ,  1. ,  0.96...],
+         [1. ,  0.96...,  1. ,  1. ,  0.96...]])
 
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -47,7 +47,7 @@ and will store the coefficients :math:`w` of the linear model in its
     >>> reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
     LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
     >>> reg.coef_
-    array([ 0.5,  0.5])
+    array([0.5, 0.5])
 
 However, coefficient estimates for Ordinary Least Squares rely on the
 independence of the model terms. When terms are correlated and the
@@ -106,7 +106,7 @@ its ``coef_`` member::
     Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
           normalize=False, random_state=None, solver='auto', tol=0.001)
     >>> reg.coef_
-    array([ 0.34545455,  0.34545455])
+    array([0.34545455, 0.34545455])
     >>> reg.intercept_ #doctest: +ELLIPSIS
     0.13636...
 
@@ -188,7 +188,7 @@ for another implementation::
        normalize=False, positive=False, precompute=False, random_state=None,
        selection='cyclic', tol=0.0001, warm_start=False)
     >>> reg.predict([[1, 1]])
-    array([ 0.8])
+    array([0.8])
 
 Also useful for lower-level tasks is the function :func:`lasso_path` that
 computes the coefficients along the full path of possible values.
@@ -453,7 +453,7 @@ function of the norm of its coefficients.
         fit_path=True, max_iter=500, normalize=True, positive=False,
         precompute='auto', verbose=False)
    >>> reg.coef_    # doctest: +ELLIPSIS
-   array([ 0.717157...,  0.        ])
+   array([0.717157..., 0.        ])
 
 .. topic:: Examples:
 
@@ -619,13 +619,13 @@ Bayesian Ridge Regression is used for regression::
 After being fitted, the model can then be used to predict new values::
 
     >>> reg.predict ([[1, 0.]])
-    array([ 0.50000013])
+    array([0.50000013])
 
 
 The weights :math:`w` of the model can be access::
 
     >>> reg.coef_
-    array([ 0.49999993,  0.49999993])
+    array([0.49999993, 0.49999993])
 
 Due to the Bayesian framework, the weights found are slightly different to the
 ones found by :ref:`ordinary_least_squares`. However, Bayesian Ridge Regression
@@ -1211,9 +1211,9 @@ of a given degree.  It can be used as follows::
            [4, 5]])
     >>> poly = PolynomialFeatures(degree=2)
     >>> poly.fit_transform(X)
-    array([[  1.,   0.,   1.,   0.,   0.,   1.],
-           [  1.,   2.,   3.,   4.,   6.,   9.],
-           [  1.,   4.,   5.,  16.,  20.,  25.]])
+    array([[ 1.,  0.,  1.,  0.,  0.,  1.],
+           [ 1.,  2.,  3.,  4.,  6.,  9.],
+           [ 1.,  4.,  5., 16., 20., 25.]])
 
 The features of ``X`` have been transformed from :math:`[x_1, x_2]` to
 :math:`[1, x_1, x_2, x_1^2, x_1 x_2, x_2^2]`, and can now be used within

diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst
@@ -166,10 +166,10 @@ It can be computed using :func:`chi2_kernel` and then passed to an
     >>> y = [0, 1, 0, 1]
     >>> K = chi2_kernel(X, gamma=.5)
     >>> K                        # doctest: +ELLIPSIS
-    array([[ 1.        ,  0.36...,  0.89...,  0.58...],
-           [ 0.36...,  1.        ,  0.51...,  0.83...],
-           [ 0.89...,  0.51...,  1.        ,  0.77... ],
-           [ 0.58...,  0.83...,  0.77... ,  1.        ]])
+    array([[1.        , 0.36787944, 0.89483932, 0.58364548],
+           [0.36787944, 1.        , 0.51341712, 0.83822343],
+           [0.89483932, 0.51341712, 1.        , 0.7768366 ],
+           [0.58364548, 0.83822343, 0.7768366 , 1.        ]])
 
     >>> svm = SVC(kernel='precomputed').fit(K, y)
     >>> svm.predict(K)