MAINT Uses pytest-xdist to parallelize tests (#13041)

thomasjpfan · web-flow · commit bec83089f7b6 · 2019-06-13T16:47:12.000-04:00
diff --git a/Makefile b/Makefile
@@ -31,10 +31,15 @@ test-doc:
 ifeq ($(BITS),64)
 	$(PYTEST) $(shell find doc -name '*.rst' | sort)
 endif
+test-code-parallel: in
+	$(PYTEST) -n auto --showlocals -v sklearn --durations=20
 
 test-coverage:
 	rm -rf coverage .coverage
 	$(PYTEST) sklearn --showlocals -v --cov=sklearn --cov-report=html:coverage
+test-coverage-parallel:
+	rm -rf coverage .coverage .coverage.*
+	$(PYTEST) sklearn -n auto --showlocals -v --cov=sklearn --cov-report=html:coverage
 
 test: test-code test-sphinxext test-doc
 
diff --git a/build_tools/azure/install.cmd b/build_tools/azure/install.cmd
@@ -11,7 +11,7 @@ IF "%PYTHON_ARCH%"=="64" (
     call deactivate
     @rem Clean up any left-over from a previous build
     conda remove --all -q -y -n %VIRTUALENV%
-    conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython matplotlib pytest wheel pillow joblib
+    conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython matplotlib pytest wheel pillow joblib pytest-xdist
 
     call activate %VIRTUALENV%
 ) else (
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
@@ -51,6 +51,10 @@ if [[ "$DISTRIB" == "conda" ]]; then
         TO_INSTALL="$TO_INSTALL matplotlib=$MATPLOTLIB_VERSION"
     fi
 
+    if [[ "$PYTHON_VERSION" == "*" ]]; then
+        TO_INSTALL="$TO_INSTALL pytest-xdist"
+    fi
+
 	make_conda $TO_INSTALL
 
 elif [[ "$DISTRIB" == "ubuntu" ]]; then
diff --git a/build_tools/azure/test_script.cmd b/build_tools/azure/test_script.cmd
@@ -1,8 +1,9 @@
 @echo on
 
-@rem Only 64 bit uses conda
+@rem Only 64 bit uses conda and uses a python newer than 3.5
 IF "%PYTHON_ARCH%"=="64" (
     call activate %VIRTUALENV%
+    set PYTEST_ARGS=%PYTEST_ARGS% -n2
 )
 
 mkdir %TMP_FOLDER%
diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh
@@ -32,6 +32,10 @@ if [[ -n "$CHECK_WARNINGS" ]]; then
     TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning"
 fi
 
+if [[ "$PYTHON_VERSION" == "*" ]]; then
+    TEST_CMD="$TEST_CMD -n2"
+fi
+
 mkdir -p $TEST_DIR
 cp setup.cfg $TEST_DIR
 cd $TEST_DIR
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -509,7 +509,7 @@ def test_symmetry():
 
 @pytest.mark.parametrize(
         'name',
-        set(ALL_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS)
+        sorted(set(ALL_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS))
 def test_sample_order_invariance(name):
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(20, ))
@@ -561,7 +561,7 @@ def test_sample_order_invariance_multilabel_and_multioutput():
 
 @pytest.mark.parametrize(
         'name',
-        set(ALL_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS)
+        sorted(set(ALL_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS))
 def test_format_invariance_with_1d_vectors(name):
     random_state = check_random_state(0)
     y1 = random_state.randint(0, 2, size=(20, ))
@@ -636,8 +636,8 @@ def test_format_invariance_with_1d_vectors(name):
 
 
 @pytest.mark.parametrize(
-       'name',
-       set(CLASSIFICATION_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS)
+    'name',
+    sorted(set(CLASSIFICATION_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS))
 def test_classification_invariance_string_vs_numbers_labels(name):
     # Ensure that classification metrics with string labels are invariant
     random_state = check_random_state(0)
@@ -767,22 +767,23 @@ def check_single_sample_multioutput(name):
 
 
 @pytest.mark.parametrize(
-        'name',
-        (set(ALL_METRICS)
-         # Those metrics are not always defined with one sample
-         # or in multiclass classification
-         - METRIC_UNDEFINED_BINARY_MULTICLASS
-         - set(THRESHOLDED_METRICS)))
+    'name',
+    sorted(
+        set(ALL_METRICS)
+        # Those metrics are not always defined with one sample
+        # or in multiclass classification
+        - METRIC_UNDEFINED_BINARY_MULTICLASS - set(THRESHOLDED_METRICS)))
 def test_single_sample(name):
     check_single_sample(name)
 
 
-@pytest.mark.parametrize('name', MULTIOUTPUT_METRICS | MULTILABELS_METRICS)
+@pytest.mark.parametrize('name',
+                         sorted(MULTIOUTPUT_METRICS | MULTILABELS_METRICS))
 def test_single_sample_multioutput(name):
     check_single_sample_multioutput(name)
 
 
-@pytest.mark.parametrize('name', MULTIOUTPUT_METRICS)
+@pytest.mark.parametrize('name', sorted(MULTIOUTPUT_METRICS))
 def test_multioutput_number_of_output_differ(name):
     y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
     y_pred = np.array([[0, 0], [1, 0], [0, 0]])
@@ -791,7 +792,7 @@ def test_multioutput_number_of_output_differ(name):
     assert_raises(ValueError, metric, y_true, y_pred)
 
 
-@pytest.mark.parametrize('name', MULTIOUTPUT_METRICS)
+@pytest.mark.parametrize('name', sorted(MULTIOUTPUT_METRICS))
 def test_multioutput_regression_invariance_to_dimension_shuffling(name):
     # test invariance to dimension shuffling
     random_state = check_random_state(0)
@@ -846,7 +847,7 @@ def test_multilabel_representation_invariance():
                                 "dense and sparse indicator formats." % name)
 
 
-@pytest.mark.parametrize('name', MULTILABELS_METRICS)
+@pytest.mark.parametrize('name', sorted(MULTILABELS_METRICS))
 def test_raise_value_error_multilabel_sequences(name):
     # make sure the multilabel-sequence format raises ValueError
     multilabel_sequences = [
@@ -862,7 +863,7 @@ def test_raise_value_error_multilabel_sequences(name):
         assert_raises(ValueError, metric, seq, seq)
 
 
-@pytest.mark.parametrize('name', METRICS_WITH_NORMALIZE_OPTION)
+@pytest.mark.parametrize('name', sorted(METRICS_WITH_NORMALIZE_OPTION))
 def test_normalize_option_binary_classification(name):
     # Test in the binary case
     n_samples = 20
@@ -879,7 +880,7 @@ def test_normalize_option_binary_classification(name):
                     measure)
 
 
-@pytest.mark.parametrize('name', METRICS_WITH_NORMALIZE_OPTION)
+@pytest.mark.parametrize('name', sorted(METRICS_WITH_NORMALIZE_OPTION))
 def test_normalize_option_multiclass_classification(name):
     # Test in the multiclass case
     random_state = check_random_state(0)
@@ -986,7 +987,7 @@ def check_averaging(name, y_true, y_true_binarize, y_pred, y_pred_binarize,
         raise ValueError("Metric is not recorded as having an average option")
 
 
-@pytest.mark.parametrize('name', METRICS_WITH_AVERAGING)
+@pytest.mark.parametrize('name', sorted(METRICS_WITH_AVERAGING))
 def test_averaging_multiclass(name):
     n_samples, n_classes = 50, 3
     random_state = check_random_state(0)
@@ -1003,7 +1004,8 @@ def test_averaging_multiclass(name):
 
 
 @pytest.mark.parametrize(
-        'name', METRICS_WITH_AVERAGING | THRESHOLDED_METRICS_WITH_AVERAGING)
+    'name',
+    sorted(METRICS_WITH_AVERAGING | THRESHOLDED_METRICS_WITH_AVERAGING))
 def test_averaging_multilabel(name):
     n_samples, n_classes = 40, 5
     _, y = make_multilabel_classification(n_features=1, n_classes=n_classes,
@@ -1019,7 +1021,7 @@ def test_averaging_multilabel(name):
                     y_pred, y_pred_binarize, y_score)
 
 
-@pytest.mark.parametrize('name', METRICS_WITH_AVERAGING)
+@pytest.mark.parametrize('name', sorted(METRICS_WITH_AVERAGING))
 def test_averaging_multilabel_all_zeroes(name):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros((20, 3))
@@ -1044,7 +1046,7 @@ def test_averaging_binary_multilabel_all_zeroes():
                      y_pred_binarize, is_multilabel=True)
 
 
-@pytest.mark.parametrize('name', METRICS_WITH_AVERAGING)
+@pytest.mark.parametrize('name', sorted(METRICS_WITH_AVERAGING))
 def test_averaging_multilabel_all_ones(name):
     y_true = np.ones((20, 3))
     y_pred = np.ones((20, 3))
@@ -1136,9 +1138,10 @@ def check_sample_weight_invariance(name, metric, y1, y2):
 
 
 @pytest.mark.parametrize(
-        'name',
-        (set(ALL_METRICS).intersection(set(REGRESSION_METRICS))
-         - METRICS_WITHOUT_SAMPLE_WEIGHT))
+    'name',
+    sorted(
+        set(ALL_METRICS).intersection(set(REGRESSION_METRICS)) -
+        METRICS_WITHOUT_SAMPLE_WEIGHT))
 def test_regression_sample_weight_invariance(name):
     n_samples = 50
     random_state = check_random_state(0)
@@ -1150,9 +1153,10 @@ def test_regression_sample_weight_invariance(name):
 
 
 @pytest.mark.parametrize(
-        'name',
-        (set(ALL_METRICS) - set(REGRESSION_METRICS)
-         - METRICS_WITHOUT_SAMPLE_WEIGHT - METRIC_UNDEFINED_BINARY))
+    'name',
+    sorted(
+        set(ALL_METRICS) - set(REGRESSION_METRICS) -
+        METRICS_WITHOUT_SAMPLE_WEIGHT - METRIC_UNDEFINED_BINARY))
 def test_binary_sample_weight_invariance(name):
     # binary
     n_samples = 50
@@ -1168,10 +1172,10 @@ def test_binary_sample_weight_invariance(name):
 
 
 @pytest.mark.parametrize(
-        'name',
-        (set(ALL_METRICS) - set(REGRESSION_METRICS)
-         - METRICS_WITHOUT_SAMPLE_WEIGHT
-         - METRIC_UNDEFINED_BINARY_MULTICLASS))
+    'name',
+    sorted(
+        set(ALL_METRICS) - set(REGRESSION_METRICS) -
+        METRICS_WITHOUT_SAMPLE_WEIGHT - METRIC_UNDEFINED_BINARY_MULTICLASS))
 def test_multiclass_sample_weight_invariance(name):
     # multiclass
     n_samples = 50
@@ -1187,9 +1191,9 @@ def test_multiclass_sample_weight_invariance(name):
 
 
 @pytest.mark.parametrize(
-        'name',
-        (MULTILABELS_METRICS | THRESHOLDED_MULTILABEL_METRICS |
-         MULTIOUTPUT_METRICS) - METRICS_WITHOUT_SAMPLE_WEIGHT)
+    'name',
+    sorted((MULTILABELS_METRICS | THRESHOLDED_MULTILABEL_METRICS
+            | MULTIOUTPUT_METRICS) - METRICS_WITHOUT_SAMPLE_WEIGHT))
 def test_multilabel_sample_weight_invariance(name):
     # multilabel indicator
     random_state = check_random_state(0)
@@ -1235,7 +1239,8 @@ def test_no_averaging_labels():
 
 
 @pytest.mark.parametrize(
-    'name', MULTILABELS_METRICS - {"unnormalized_multilabel_confusion_matrix"})
+    'name',
+    sorted(MULTILABELS_METRICS - {"unnormalized_multilabel_confusion_matrix"}))
 def test_multilabel_label_permutations_invariance(name):
     random_state = check_random_state(0)
     n_samples, n_classes = 20, 4
@@ -1255,7 +1260,7 @@ def test_multilabel_label_permutations_invariance(name):
 
 
 @pytest.mark.parametrize(
-    'name', THRESHOLDED_MULTILABEL_METRICS | MULTIOUTPUT_METRICS)
+    'name', sorted(THRESHOLDED_MULTILABEL_METRICS | MULTIOUTPUT_METRICS))
 def test_thresholded_multilabel_multioutput_permutations_invariance(name):
     random_state = check_random_state(0)
     n_samples, n_classes = 20, 4
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
@@ -26,7 +26,7 @@
 
 all_sparse_random_matrix = [sparse_random_matrix]
 all_dense_random_matrix = [gaussian_random_matrix]
-all_random_matrix = set(all_sparse_random_matrix + all_dense_random_matrix)
+all_random_matrix = all_sparse_random_matrix + all_dense_random_matrix
 
 all_SparseRandomProjection = [SparseRandomProjection]
 all_DenseRandomProjection = [GaussianRandomProjection]
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
@@ -1375,7 +1375,7 @@ def test_sparse_input(tree_type, dataset):
 
 
 @pytest.mark.parametrize("tree_type",
-                         set(SPARSE_TREES).intersection(REG_TREES))
+                         sorted(set(SPARSE_TREES).intersection(REG_TREES)))
 @pytest.mark.parametrize("dataset", ["boston", "reg_small"])
 def test_sparse_input_reg_trees(tree_type, dataset):
     # Due to numerical instability of MSE and too strict test, we limit the

Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,9 @@`
`1`	`1`	`@echo on`
`2`	`2`
`3`		`-@rem Only 64 bit uses conda`
	`3`	`+@rem Only 64 bit uses conda and uses a python newer than 3.5`
`4`	`4`	`IF "%PYTHON_ARCH%"=="64" (`
`5`	`5`	`call activate %VIRTUALENV%`
	`6`	`+ set PYTEST_ARGS=%PYTEST_ARGS% -n2`
`6`	`7`	`)`
`7`	`8`
`8`	`9`	`mkdir %TMP_FOLDER%`