diff --git a/.travis.yml b/.travis.yml index 2a515584a498a..bb6344c871124 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,11 @@ env: - DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true" NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.1" CYTHON_VERSION="0.25.2" + # This environment use pytest to run the tests. It uses the newest + # supported anaconda env. It also runs tests requiring Pandas. + - USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true" + NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.1" + CYTHON_VERSION="0.25.2" # flake8 linting on diff wrt common ancestor with upstream/master - RUN_FLAKE8="true" SKIP_TESTS="true" DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true" diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index 4d876e90b7c25..fa6380e0451ad 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -51,13 +51,13 @@ if [[ "$DISTRIB" == "conda" ]]; then # Configure the conda environment and put it in the path using the # provided versions if [[ "$INSTALL_MKL" == "true" ]]; then - conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ + conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ mkl cython=$CYTHON_VERSION \ ${PANDAS_VERSION+pandas=$PANDAS_VERSION} else - conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ + conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ nomkl cython=$CYTHON_VERSION \ ${PANDAS_VERSION+pandas=$PANDAS_VERSION} diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh index 6ab342b932cf1..35a74a3087394 100755 --- a/build_tools/travis/test_script.sh +++ b/build_tools/travis/test_script.sh @@ -21,6 +21,11 @@ except ImportError: python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())" run_tests() { + if [[ "$USE_PYTEST" == "true" ]]; then + TEST_CMD="pytest --showlocals --pyargs" + else + TEST_CMD="nosetests --with-coverage" # --with-timer --timer-top-n 20" + fi # Get into a temp directory to run test from the installed scikit learn and # check if we do not leave artifacts mkdir -p $TEST_DIR @@ -34,10 +39,9 @@ run_tests() { export SKLEARN_SKIP_NETWORK_TESTS=1 if [[ "$COVERAGE" == "true" ]]; then - nosetests -s --with-coverage --with-timer --timer-top-n 20 sklearn - else - nosetests -s --with-timer --timer-top-n 20 sklearn + TEST_CMD="$TEST_CMD --with-coverage" fi + $TEST_CMD sklearn # Test doc cd $OLDPWD diff --git a/setup.cfg b/setup.cfg index 0df34d84602e3..378905311e17e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,6 +22,13 @@ doctest-fixtures = _fixture ignore-files=^setup\.py$ #doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE +[tool:pytest] +# disable-pytest-warnings should be removed once we drop nose and we +# rewrite tests using yield with parametrize +addopts = + --doctest-modules + --disable-pytest-warnings + [wheelhouse_uploader] artifact_indexes= # OSX wheels built by travis (only for specific tags): diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index 003c5727da097..c759f813104a9 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -194,7 +194,6 @@ def check_hyperparameters_equal(kernel1, kernel2): def test_kernel_clone(): # Test that sklearn's clone works correctly on kernels. - bounds = (1e-5, 1e5) for kernel in kernels: kernel_cloned = clone(kernel) @@ -209,12 +208,17 @@ def test_kernel_clone(): # Check that all hyperparameters are equal. yield check_hyperparameters_equal, kernel, kernel_cloned - # This test is to verify that using set_params does not - # break clone on kernels. - # This used to break because in kernels such as the RBF, non-trivial - # logic that modified the length scale used to be in the constructor - # See https://github.com/scikit-learn/scikit-learn/issues/6961 - # for more details. + +def test_kernel_clone_after_set_params(): + # This test is to verify that using set_params does not + # break clone on kernels. + # This used to break because in kernels such as the RBF, non-trivial + # logic that modified the length scale used to be in the constructor + # See https://github.com/scikit-learn/scikit-learn/issues/6961 + # for more details. + bounds = (1e-5, 1e5) + for kernel in kernels: + kernel_cloned = clone(kernel) params = kernel.get_params() # RationalQuadratic kernel is isotropic. isotropic_kernels = (ExpSineSquared, RationalQuadratic) diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index 0f5968cd63897..980cb8909ec94 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -156,6 +156,14 @@ def compute_kernel_slow(Y, X, kernel, h): raise ValueError('kernel not recognized') +def check_results(kernel, h, atol, rtol, breadth_first, bt, Y, dens_true): + dens = bt.kernel_density(Y, h, atol=atol, rtol=rtol, + kernel=kernel, + breadth_first=breadth_first) + assert_allclose(dens, dens_true, + atol=atol, rtol=max(rtol, 1e-7)) + + def test_ball_tree_kde(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) @@ -167,18 +175,11 @@ def test_ball_tree_kde(n_samples=100, n_features=3): for h in [0.01, 0.1, 1]: dens_true = compute_kernel_slow(Y, X, kernel, h) - def check_results(kernel, h, atol, rtol, breadth_first): - dens = bt.kernel_density(Y, h, atol=atol, rtol=rtol, - kernel=kernel, - breadth_first=breadth_first) - assert_allclose(dens, dens_true, - atol=atol, rtol=max(rtol, 1e-7)) - for rtol in [0, 1E-5]: for atol in [1E-6, 1E-2]: for breadth_first in (True, False): yield (check_results, kernel, h, atol, rtol, - breadth_first) + breadth_first, bt, Y, dens_true) def test_gaussian_kde(n_samples=1000): diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py index 50ece8f97a271..c9fc06989ff10 100644 --- a/sklearn/neighbors/tests/test_kd_tree.py +++ b/sklearn/neighbors/tests/test_kd_tree.py @@ -24,28 +24,29 @@ def brute_force_neighbors(X, Y, k, metric, **kwargs): return dist, ind +def check_neighbors(dualtree, breadth_first, k, metric, X, Y, kwargs): + kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs) + dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, + breadth_first=breadth_first) + dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs) + + # don't check indices here: if there are any duplicate distances, + # the indices may not match. Distances should not have this problem. + assert_array_almost_equal(dist1, dist2) + + def test_kd_tree_query(): np.random.seed(0) X = np.random.random((40, DIMENSION)) Y = np.random.random((10, DIMENSION)) - def check_neighbors(dualtree, breadth_first, k, metric, kwargs): - kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs) - dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, - breadth_first=breadth_first) - dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs) - - # don't check indices here: if there are any duplicate distances, - # the indices may not match. Distances should not have this problem. - assert_array_almost_equal(dist1, dist2) - for (metric, kwargs) in METRICS.items(): for k in (1, 3, 5): for dualtree in (True, False): for breadth_first in (True, False): yield (check_neighbors, dualtree, breadth_first, - k, metric, kwargs) + k, metric, X, Y, kwargs) def test_kd_tree_query_radius(n_samples=100, n_features=10): @@ -107,6 +108,14 @@ def compute_kernel_slow(Y, X, kernel, h): raise ValueError('kernel not recognized') +def check_results(kernel, h, atol, rtol, breadth_first, Y, kdt, dens_true): + dens = kdt.kernel_density(Y, h, atol=atol, rtol=rtol, + kernel=kernel, + breadth_first=breadth_first) + assert_allclose(dens, dens_true, atol=atol, + rtol=max(rtol, 1e-7)) + + def test_kd_tree_kde(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) @@ -118,18 +127,11 @@ def test_kd_tree_kde(n_samples=100, n_features=3): for h in [0.01, 0.1, 1]: dens_true = compute_kernel_slow(Y, X, kernel, h) - def check_results(kernel, h, atol, rtol, breadth_first): - dens = kdt.kernel_density(Y, h, atol=atol, rtol=rtol, - kernel=kernel, - breadth_first=breadth_first) - assert_allclose(dens, dens_true, atol=atol, - rtol=max(rtol, 1e-7)) - for rtol in [0, 1E-5]: for atol in [1E-6, 1E-2]: for breadth_first in (True, False): yield (check_results, kernel, h, atol, rtol, - breadth_first) + breadth_first, Y, kdt, dens_true) def test_gaussian_kde(n_samples=1000): diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py index 3078a3c05df39..309eb2bc58796 100644 --- a/sklearn/neighbors/tests/test_kde.py +++ b/sklearn/neighbors/tests/test_kde.py @@ -29,6 +29,17 @@ def compute_kernel_slow(Y, X, kernel, h): raise ValueError('kernel not recognized') +def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true): + kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, + atol=atol, rtol=rtol) + log_dens = kde.fit(X).score_samples(Y) + assert_allclose(np.exp(log_dens), dens_true, + atol=atol, rtol=max(1E-7, rtol)) + assert_allclose(np.exp(kde.score(Y)), + np.prod(dens_true), + atol=atol, rtol=max(1E-7, rtol)) + + def test_kernel_density(n_samples=100, n_features=3): rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) @@ -39,20 +50,11 @@ def test_kernel_density(n_samples=100, n_features=3): for bandwidth in [0.01, 0.1, 1]: dens_true = compute_kernel_slow(Y, X, kernel, bandwidth) - def check_results(kernel, bandwidth, atol, rtol): - kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, - atol=atol, rtol=rtol) - log_dens = kde.fit(X).score_samples(Y) - assert_allclose(np.exp(log_dens), dens_true, - atol=atol, rtol=max(1E-7, rtol)) - assert_allclose(np.exp(kde.score(Y)), - np.prod(dens_true), - atol=atol, rtol=max(1E-7, rtol)) - for rtol in [0, 1E-5]: for atol in [1E-6, 1E-2]: for breadth_first in (True, False): - yield (check_results, kernel, bandwidth, atol, rtol) + yield (check_results, kernel, bandwidth, atol, rtol, + X, Y, dens_true) def test_kernel_density_sampling(n_samples=100, n_features=3):