diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index c823f0c3dcd10..e0e4091d4d2de 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -21,7 +21,7 @@ def test_affinity_propagation(): - """Affinity Propagation algorithm """ + # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 @@ -60,7 +60,7 @@ def test_affinity_propagation(): def test_affinity_propagation_predict(): - """Test AffinityPropagation.predict""" + # Test AffinityPropagation.predict af = AffinityPropagation(affinity="euclidean") labels = af.fit_predict(X) labels2 = af.predict(X) @@ -68,7 +68,7 @@ def test_affinity_propagation_predict(): def test_affinity_propagation_predict_error(): - """Test exception in AffinityPropagation.predict""" + # Test exception in AffinityPropagation.predict # Not fitted. af = AffinityPropagation(affinity="euclidean") assert_raises(ValueError, af.predict, X) diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index 31513bfdfc641..9afcca5ff0eec 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -55,7 +55,7 @@ def test_get_submatrix(): def _test_shape_indices(model): - """Test get_shape and get_indices on fitted model.""" + # Test get_shape and get_indices on fitted model. for i in range(model.n_clusters): m, n = model.get_shape(i) i_ind, j_ind = model.get_indices(i) @@ -64,7 +64,7 @@ def _test_shape_indices(model): def test_spectral_coclustering(): - """Test Dhillon's Spectral CoClustering on a simple problem.""" + # Test Dhillon's Spectral CoClustering on a simple problem. param_grid = {'svd_method': ['randomized', 'arpack'], 'n_svd_vecs': [None, 20], 'mini_batch': [False, True], @@ -93,7 +93,7 @@ def test_spectral_coclustering(): def test_spectral_biclustering(): - """Test Kluger methods on a checkerboard dataset.""" + # Test Kluger methods on a checkerboard dataset. S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5, random_state=0) diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index 3f51769482df3..41d915b74fc9d 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -22,7 +22,7 @@ def test_n_samples_leaves_roots(): - """Sanity check for the number of samples in leaves and roots""" + # Sanity check for the number of samples in leaves and roots X, y = make_blobs(n_samples=10) brc = Birch() brc.fit(X) @@ -34,7 +34,7 @@ def test_n_samples_leaves_roots(): def test_partial_fit(): - """Test that fit is equivalent to calling partial_fit multiple times""" + # Test that fit is equivalent to calling partial_fit multiple times X, y = make_blobs(n_samples=100) brc = Birch(n_clusters=3) brc.fit(X) @@ -52,7 +52,7 @@ def test_partial_fit(): def test_birch_predict(): - """Test the predict method predicts the nearest centroid.""" + # Test the predict method predicts the nearest centroid. rng = np.random.RandomState(0) X = generate_clustered_data(n_clusters=3, n_features=3, n_samples_per_cluster=10) @@ -70,7 +70,7 @@ def test_birch_predict(): def test_n_clusters(): - """Test that n_clusters param works properly""" + # Test that n_clusters param works properly X, y = make_blobs(n_samples=100, centers=10) brc1 = Birch(n_clusters=10) brc1.fit(X) @@ -96,7 +96,7 @@ def test_n_clusters(): def test_sparse_X(): - """Test that sparse and dense data give same results""" + # Test that sparse and dense data give same results X, y = make_blobs(n_samples=100, centers=10) brc = Birch(n_clusters=10) brc.fit(X) @@ -119,7 +119,7 @@ def check_branching_factor(node, branching_factor): def test_branching_factor(): - """Test that nodes have at max branching_factor number of subclusters""" + # Test that nodes have at max branching_factor number of subclusters X, y = make_blobs() branching_factor = 9 @@ -149,7 +149,7 @@ def check_threshold(birch_instance, threshold): def test_threshold(): - """Test that the leaf subclusters have a threshold lesser than radius""" + # Test that the leaf subclusters have a threshold lesser than radius X, y = make_blobs(n_samples=80, centers=4) brc = Birch(threshold=0.5, n_clusters=None) brc.fit(X) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index be53b85a0e701..dc5dce4d8edee 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -25,7 +25,7 @@ def test_dbscan_similarity(): - """Tests the DBSCAN algorithm with a similarity array.""" + # Tests the DBSCAN algorithm with a similarity array. # Parameters chosen specifically for this task. eps = 0.15 min_samples = 10 @@ -48,7 +48,7 @@ def test_dbscan_similarity(): def test_dbscan_feature(): - """Tests the DBSCAN algorithm with a feature vector array.""" + # Tests the DBSCAN algorithm with a feature vector array. # Parameters chosen specifically for this task. # Different eps to other test, because distance is not normalised. eps = 0.8 @@ -91,7 +91,7 @@ def test_dbscan_no_core_samples(): def test_dbscan_callable(): - """Tests the DBSCAN algorithm with a callable metric.""" + # Tests the DBSCAN algorithm with a callable metric. # Parameters chosen specifically for this task. # Different eps to other test, because distance is not normalised. eps = 0.8 @@ -117,7 +117,7 @@ def test_dbscan_callable(): def test_dbscan_balltree(): - """Tests the DBSCAN algorithm with balltree for neighbor calculation.""" + # Tests the DBSCAN algorithm with balltree for neighbor calculation. eps = 0.8 min_samples = 10 @@ -156,13 +156,13 @@ def test_dbscan_balltree(): def test_input_validation(): - """DBSCAN.fit should accept a list of lists.""" + # DBSCAN.fit should accept a list of lists. X = [[1., 2.], [3., 4.]] DBSCAN().fit(X) # must not raise exception def test_dbscan_badargs(): - """Test bad argument values: these should all raise ValueErrors""" + # Test bad argument values: these should all raise ValueErrors assert_raises(ValueError, dbscan, X, eps=-1.0) diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 8733fd6e05ada..8cfba9822e764 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -58,9 +58,7 @@ def test_linkage_misc(): def test_structured_linkage_tree(): - """ - Check that we obtain the correct solution for structured linkage trees. - """ + # Check that we obtain the correct solution for structured linkage trees. rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) # Avoiding a mask with only 'True' entries @@ -82,9 +80,7 @@ def test_structured_linkage_tree(): def test_unstructured_linkage_tree(): - """ - Check that we obtain the correct solution for unstructured linkage trees. - """ + # Check that we obtain the correct solution for unstructured linkage trees. rng = np.random.RandomState(0) X = rng.randn(50, 100) for this_X in (X, X[0]): @@ -107,9 +103,7 @@ def test_unstructured_linkage_tree(): def test_height_linkage_tree(): - """ - Check that the height of the results of linkage tree is sorted. - """ + # Check that the height of the results of linkage tree is sorted. rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rng.randn(50, 100) @@ -121,10 +115,8 @@ def test_height_linkage_tree(): def test_agglomerative_clustering(): - """ - Check that we obtain the correct number of clusters with - agglomerative clustering. - """ + # Check that we obtain the correct number of clusters with + # agglomerative clustering. rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) n_samples = 100 @@ -211,9 +203,7 @@ def test_agglomerative_clustering(): def test_ward_agglomeration(): - """ - Check that we obtain the correct solution in a simplistic case - """ + # Check that we obtain the correct solution in a simplistic case rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rng.randn(50, 100) @@ -245,8 +235,7 @@ def assess_same_labelling(cut1, cut2): def test_scikit_vs_scipy(): - """Test scikit linkage with full connectivity (i.e. unstructured) vs scipy - """ + # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy n, p, k = 10, 5, 3 rng = np.random.RandomState(0) @@ -273,10 +262,8 @@ def test_scikit_vs_scipy(): def test_connectivity_propagation(): - """ - Check that connectivity in the ward tree is propagated correctly during - merging. - """ + # Check that connectivity in the ward tree is propagated correctly during + # merging. X = np.array([(.014, .120), (.014, .099), (.014, .097), (.017, .153), (.017, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), @@ -291,10 +278,8 @@ def test_connectivity_propagation(): def test_ward_tree_children_order(): - """ - Check that children are ordered in the same way for both structured and - unstructured versions of ward_tree. - """ + # Check that children are ordered in the same way for both structured and + # unstructured versions of ward_tree. # test on five random datasets n, p = 10, 5 @@ -313,7 +298,7 @@ def test_ward_tree_children_order(): def test_ward_linkage_tree_return_distance(): - """Test return_distance option on linkage and ward trees""" + # Test return_distance option on linkage and ward trees # test that return_distance when set true, gives same # output on both structured and unstructured clustering. @@ -420,10 +405,8 @@ def test_ward_linkage_tree_return_distance(): def test_connectivity_fixing_non_lil(): - """ - Check non regression of a bug if a non item assignable connectivity is - provided with more than one component. - """ + # Check non regression of a bug if a non item assignable connectivity is + # provided with more than one component. # create dummy data x = np.array([[0, 0], [1, 1]]) # create a mask with several components to force connectivity fixing @@ -475,7 +458,7 @@ def test_connectivity_ignores_diagonal(): def test_compute_full_tree(): - """Test that the full tree is computed if n_clusters is small""" + # Test that the full tree is computed if n_clusters is small rng = np.random.RandomState(0) X = rng.randn(10, 2) connectivity = kneighbors_graph(X, 5, include_self=False) @@ -502,7 +485,7 @@ def test_compute_full_tree(): def test_n_components(): - """Test n_components returned by linkage, average and ward tree""" + # Test n_components returned by linkage, average and ward tree rng = np.random.RandomState(0) X = rng.rand(5, 5) diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 75015bd678e1f..f52280c13d9cc 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -81,7 +81,7 @@ def test_labels_assignment_and_inertia(): def test_minibatch_update_consistency(): - """Check that dense and sparse minibatch update give the same results""" + # Check that dense and sparse minibatch update give the same results rng = np.random.RandomState(42) old_centers = centers + rng.normal(size=centers.shape) @@ -480,7 +480,7 @@ def test_mini_match_k_means_invalid_init(): def test_k_means_copyx(): - """Check if copy_x=False returns nearly equal X after de-centering.""" + # Check if copy_x=False returns nearly equal X after de-centering. my_X = X.copy() km = KMeans(copy_x=False, n_clusters=n_clusters, random_state=42) km.fit(my_X) @@ -491,13 +491,11 @@ def test_k_means_copyx(): def test_k_means_non_collapsed(): - """Check k_means with a bad initialization does not yield a singleton - - Starting with bad centers that are quickly ignored should not - result in a repositioning of the centers to the center of mass that - would lead to collapsed centers which in turns make the clustering - dependent of the numerical unstabilities. - """ + # Check k_means with a bad initialization does not yield a singleton + # Starting with bad centers that are quickly ignored should not + # result in a repositioning of the centers to the center of mass that + # would lead to collapsed centers which in turns make the clustering + # dependent of the numerical unstabilities. my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]]) array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]]) km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1) @@ -630,7 +628,7 @@ def test_fit_transform(): def test_n_init(): - """Check that increasing the number of init increases the quality""" + # Check that increasing the number of init increases the quality n_runs = 5 n_init_range = [1, 5, 10] inertia = np.zeros((len(n_init_range), n_runs)) diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py index 736952d0f181a..9aefa20897414 100644 --- a/sklearn/cluster/tests/test_mean_shift.py +++ b/sklearn/cluster/tests/test_mean_shift.py @@ -25,13 +25,13 @@ def test_estimate_bandwidth(): - """Test estimate_bandwidth""" + # Test estimate_bandwidth bandwidth = estimate_bandwidth(X, n_samples=200) assert_true(0.9 <= bandwidth <= 1.5) def test_mean_shift(): - """ Test MeanShift algorithm """ + # Test MeanShift algorithm bandwidth = 1.2 ms = MeanShift(bandwidth=bandwidth) @@ -47,7 +47,7 @@ def test_mean_shift(): def test_meanshift_predict(): - """Test MeanShift.predict""" + # Test MeanShift.predict ms = MeanShift(bandwidth=1.2) labels = ms.fit_predict(X) labels2 = ms.predict(X) @@ -62,17 +62,15 @@ def test_meanshift_all_orphans(): def test_unfitted(): - """Non-regression: before fit, there should be not fitted attributes.""" + # Non-regression: before fit, there should be not fitted attributes. ms = MeanShift() assert_false(hasattr(ms, "cluster_centers_")) assert_false(hasattr(ms, "labels_")) def test_bin_seeds(): - """ - Test the bin seeding technique which can be used in the mean shift - algorithm - """ + # Test the bin seeding technique which can be used in the mean shift + # algorithm # Data is just 6 points in the plane X = np.array([[1., 1.], [1.4, 1.4], [1.8, 1.2], [2., 1.], [2.1, 1.1], [0., 0.]]) diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index 412f0a0211353..c3810814bc17e 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -162,7 +162,7 @@ def test_affinities(): assert_equal((X.shape[0],), labels.shape) def histogram(x, y, **kwargs): - """Histogram kernel implemented as a callable.""" + # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index e2888fe99228e..188e4930e9175 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -23,9 +23,7 @@ def test_covariance(): - """Tests Covariance module on a simple dataset. - - """ + # Tests Covariance module on a simple dataset. # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) @@ -76,9 +74,7 @@ def test_covariance(): def test_shrunk_covariance(): - """Tests ShrunkCovariance module on a simple dataset. - - """ + # Tests ShrunkCovariance module on a simple dataset. # compare shrunk covariance obtained from data and from MLE estimate cov = ShrunkCovariance(shrinkage=0.5) cov.fit(X) @@ -110,9 +106,7 @@ def test_shrunk_covariance(): def test_ledoit_wolf(): - """Tests LedoitWolf module on a simple dataset. - - """ + # Tests LedoitWolf module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) lw = LedoitWolf(assume_centered=True) @@ -197,9 +191,7 @@ def test_ledoit_wolf(): def test_oas(): - """Tests OAS module on a simple dataset. - - """ + # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) @@ -231,7 +223,7 @@ def test_oas(): assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) - ### Same tests without assuming centered data + # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py index d08c6291a9695..63e92fb6af803 100644 --- a/sklearn/covariance/tests/test_robust_covariance.py +++ b/sklearn/covariance/tests/test_robust_covariance.py @@ -21,10 +21,8 @@ def test_mcd(): - """Tests the FastMCD algorithm implementation - - """ - ### Small data set + # Tests the FastMCD algorithm implementation + # Small data set # test without outliers (random independent normal data) launch_mcd_on_dataset(100, 5, 0, 0.01, 0.1, 80) # test with a contaminated data set (medium contamination) @@ -32,13 +30,13 @@ def test_mcd(): # test with a contaminated data set (strong contamination) launch_mcd_on_dataset(100, 5, 40, 0.1, 0.1, 50) - ### Medium data set + # Medium data set launch_mcd_on_dataset(1000, 5, 450, 0.1, 0.1, 540) - ### Large data set + # Large data set launch_mcd_on_dataset(1700, 5, 800, 0.1, 0.1, 870) - ### 1D data set + # 1D data set launch_mcd_on_dataset(500, 1, 100, 0.001, 0.001, 350) diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index 9efd4732206f9..84748bf8d2cc9 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -236,7 +236,7 @@ def test_PLSSVD(): def test_univariate_pls_regression(): - """Ensure 1d Y is correctly interpreted""" + # Ensure 1d Y is correctly interpreted d = load_linnerud() X = d.data Y = d.target diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py index f1c7e2dab1090..129e3c2609ef9 100644 --- a/sklearn/decomposition/tests/test_factor_analysis.py +++ b/sklearn/decomposition/tests/test_factor_analysis.py @@ -16,8 +16,7 @@ def test_factor_analysis(): - """Test FactorAnalysis ability to recover the data covariance structure - """ + # Test FactorAnalysis ability to recover the data covariance structure rng = np.random.RandomState(0) n_samples, n_features, n_components = 20, 5, 3 diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 0bd7c83d1b5fc..66963701d15bd 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -38,9 +38,7 @@ def center_and_norm(x, axis=-1): def test_gs(): - """ - Test gram schmidt orthonormalization - """ + # Test gram schmidt orthonormalization # generate a random orthogonal matrix rng = np.random.RandomState(0) W, _, _ = np.linalg.svd(rng.randn(10, 10)) @@ -54,8 +52,7 @@ def test_gs(): def test_fastica_simple(add_noise=False): - """ Test the FastICA algorithm on very simple data. - """ + # Test the FastICA algorithm on very simple data. rng = np.random.RandomState(0) # scipy.stats uses the global RNG: np.random.seed(0) @@ -146,8 +143,7 @@ def test_fastica_nowhiten(): def test_non_square_fastica(add_noise=False): - """ Test the FastICA algorithm on very simple data. - """ + # Test the FastICA algorithm on very simple data. rng = np.random.RandomState(0) n_samples = 1000 @@ -190,7 +186,7 @@ def test_non_square_fastica(add_noise=False): def test_fit_transform(): - """Test FastICA.fit_transform""" + # Test FastICA.fit_transform rng = np.random.RandomState(0) X = rng.random_sample((100, 10)) for whiten, n_components in [[True, 5], [False, None]]: @@ -211,7 +207,7 @@ def test_fit_transform(): def test_inverse_transform(): - """Test FastICA.inverse_transform""" + # Test FastICA.inverse_transform n_features = 10 n_samples = 100 n1, n2 = 5, 10 diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index db75dfe7f9960..ae453d5f85f1c 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -12,7 +12,7 @@ def test_incremental_pca(): - """Incremental PCA on dense arrays.""" + # Incremental PCA on dense arrays. X = iris.data batch_size = X.shape[0] // 3 ipca = IncrementalPCA(n_components=2, batch_size=batch_size) @@ -35,7 +35,7 @@ def test_incremental_pca(): def test_incremental_pca_check_projection(): - """Test that the projection of data is correct.""" + # Test that the projection of data is correct. rng = np.random.RandomState(1999) n, p = 100, 3 X = rng.randn(n, p) * .1 @@ -56,7 +56,7 @@ def test_incremental_pca_check_projection(): def test_incremental_pca_inverse(): - """Test that the projection of data can be inverted.""" + # Test that the projection of data can be inverted. rng = np.random.RandomState(1999) n, p = 50, 3 X = rng.randn(n, p) # spherical data @@ -72,7 +72,7 @@ def test_incremental_pca_inverse(): def test_incremental_pca_validation(): - """Test that n_components is >=1 and <= n_features.""" + # Test that n_components is >=1 and <= n_features. X = [[0, 1], [1, 0]] for n_components in [-1, 0, .99, 3]: assert_raises(ValueError, IncrementalPCA(n_components, @@ -80,7 +80,7 @@ def test_incremental_pca_validation(): def test_incremental_pca_set_params(): - """Test that components_ sign is stable over batch sizes.""" + # Test that components_ sign is stable over batch sizes. rng = np.random.RandomState(1999) n_samples = 100 n_features = 20 @@ -101,7 +101,7 @@ def test_incremental_pca_set_params(): def test_incremental_pca_num_features_change(): - """Test that changing n_components will raise an error.""" + # Test that changing n_components will raise an error. rng = np.random.RandomState(1999) n_samples = 100 X = rng.randn(n_samples, 20) @@ -112,7 +112,7 @@ def test_incremental_pca_num_features_change(): def test_incremental_pca_batch_signs(): - """Test that components_ sign is stable over batch sizes.""" + # Test that components_ sign is stable over batch sizes. rng = np.random.RandomState(1999) n_samples = 100 n_features = 3 @@ -128,7 +128,7 @@ def test_incremental_pca_batch_signs(): def test_incremental_pca_batch_values(): - """Test that components_ values are stable over batch sizes.""" + # Test that components_ values are stable over batch sizes. rng = np.random.RandomState(1999) n_samples = 100 n_features = 3 @@ -144,7 +144,7 @@ def test_incremental_pca_batch_values(): def test_incremental_pca_partial_fit(): - """Test that fit and partial_fit get equivalent results.""" + # Test that fit and partial_fit get equivalent results. rng = np.random.RandomState(1999) n, p = 50, 3 X = rng.randn(n, p) # spherical data @@ -164,7 +164,7 @@ def test_incremental_pca_partial_fit(): def test_incremental_pca_against_pca_iris(): - """Test that IncrementalPCA and PCA are approximate (to a sign flip).""" + # Test that IncrementalPCA and PCA are approximate (to a sign flip). X = iris.data Y_pca = PCA(n_components=2).fit_transform(X) @@ -174,7 +174,7 @@ def test_incremental_pca_against_pca_iris(): def test_incremental_pca_against_pca_random_data(): - """Test that IncrementalPCA and PCA are approximate (to a sign flip).""" + # Test that IncrementalPCA and PCA are approximate (to a sign flip). rng = np.random.RandomState(1999) n_samples = 100 n_features = 3 @@ -187,7 +187,7 @@ def test_incremental_pca_against_pca_random_data(): def test_explained_variances(): - """Test that PCA and IncrementalPCA calculations match""" + # Test that PCA and IncrementalPCA calculations match X = datasets.make_low_rank_matrix(1000, 100, tail_strength=0., effective_rank=10, random_state=1999) prec = 3 @@ -204,7 +204,7 @@ def test_explained_variances(): def test_whitening(): - """Test that PCA and IncrementalPCA transforms match to sign flip.""" + # Test that PCA and IncrementalPCA transforms match to sign flip. X = datasets.make_low_rank_matrix(1000, 10, tail_strength=0., effective_rank=2, random_state=1999) prec = 3 diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index a86c33a9e9937..9cd9adbc2bb1d 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -19,7 +19,7 @@ def test_kernel_pca(): X_pred = rng.random_sample((2, 4)) def histogram(x, y, **kwargs): - """Histogram kernel implemented as a callable.""" + # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() @@ -78,8 +78,8 @@ def test_kernel_pca_sparse(): X_fit_transformed.shape[1]) # inverse transform - #X_pred2 = kpca.inverse_transform(X_pred_transformed) - #assert_equal(X_pred2.shape, X_pred.shape) + # X_pred2 = kpca.inverse_transform(X_pred_transformed) + # assert_equal(X_pred2.shape, X_pred.shape) def test_kernel_pca_linear_kernel(): @@ -187,7 +187,7 @@ def test_gridsearch_pipeline_precomputed(): def test_nested_circles(): - """Test the linear separability of the first 2D KPCA transform""" + # Test the linear separability of the first 2D KPCA transform X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 8476e7e313007..4793935bdce45 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -15,12 +15,12 @@ @raises(ValueError) def test_initialize_nn_input(): - """Test NNDSVD behaviour on negative input""" + # Test NNDSVD behaviour on negative input nmf._initialize_nmf(-np.ones((2, 2)), 2) def test_initialize_nn_output(): - """Test that NNDSVD does not return negative values""" + # Test that NNDSVD does not return negative values data = np.abs(random_state.randn(10, 10)) for var in (None, 'a', 'ar'): W, H = nmf._initialize_nmf(data, 10, random_state=0) @@ -28,11 +28,9 @@ def test_initialize_nn_output(): def test_initialize_close(): - """Test NNDSVD error - - Test that _initialize_nmf error is less than the standard deviation of the - entries in the matrix. - """ + # Test NNDSVD error + # Test that _initialize_nmf error is less than the standard deviation of + # the entries in the matrix. A = np.abs(random_state.randn(10, 10)) W, H = nmf._initialize_nmf(A, 10) error = linalg.norm(np.dot(W, H) - A) @@ -41,11 +39,9 @@ def test_initialize_close(): def test_initialize_variants(): - """Test NNDSVD variants correctness - - Test that the variants 'a' and 'ar' differ from basic NNDSVD only where - the basic version has zeros. - """ + # Test NNDSVD variants correctness + # Test that the variants 'a' and 'ar' differ from basic NNDSVD only where + # the basic version has zeros. data = np.abs(random_state.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, variant=None) Wa, Ha = nmf._initialize_nmf(data, 10, variant='a') @@ -57,14 +53,14 @@ def test_initialize_variants(): @raises(ValueError) def test_projgrad_nmf_fit_nn_input(): - """Test model fit behaviour on negative input""" + # Test model fit behaviour on negative input A = -np.ones((2, 2)) m = nmf.ProjectedGradientNMF(n_components=2, init=None, random_state=0) m.fit(A) def test_projgrad_nmf_fit_nn_output(): - """Test that the decomposition does not contain negative values""" + # Test that the decomposition does not contain negative values A = np.c_[5 * np.ones(5) - np.arange(1, 6), 5 * np.ones(5) + np.arange(1, 6)] for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar'): @@ -76,21 +72,21 @@ def test_projgrad_nmf_fit_nn_output(): def test_projgrad_nmf_fit_close(): - """Test that the fit is not too far away""" + # Test that the fit is not too far away pnmf = nmf.ProjectedGradientNMF(5, init='nndsvda', random_state=0) X = np.abs(random_state.randn(6, 5)) assert_less(pnmf.fit(X).reconstruction_err_, 0.05) def test_nls_nn_output(): - """Test that NLS solver doesn't return negative values""" + # Test that NLS solver doesn't return negative values A = np.arange(1, 5).reshape(1, -1) Ap, _, _ = nmf._nls_subproblem(np.dot(A.T, -A), A.T, A, 0.001, 100) assert_false((Ap < 0).any()) def test_nls_close(): - """Test that the NLS results should be close""" + # Test that the NLS results should be close A = np.arange(1, 5).reshape(1, -1) Ap, _, _ = nmf._nls_subproblem(np.dot(A.T, A), A.T, np.zeros_like(A), 0.001, 100) @@ -98,10 +94,8 @@ def test_nls_close(): def test_projgrad_nmf_transform(): - """Test that NMF.transform returns close values - - (transform uses scipy.optimize.nnls for now) - """ + # Test that NMF.transform returns close values + # (transform uses scipy.optimize.nnls for now) A = np.abs(random_state.randn(6, 5)) m = nmf.ProjectedGradientNMF(n_components=5, init='nndsvd', random_state=0) transf = m.fit_transform(A) @@ -109,18 +103,16 @@ def test_projgrad_nmf_transform(): def test_n_components_greater_n_features(): - """Smoke test for the case of more components than features.""" + # Smoke test for the case of more components than features. A = np.abs(random_state.randn(30, 10)) nmf.ProjectedGradientNMF(n_components=15, sparseness='data', random_state=0).fit(A) def test_projgrad_nmf_sparseness(): - """Test sparseness - - Test that sparsity constraints actually increase sparseness in the - part where they are applied. - """ + # Test sparseness + # Test that sparsity constraints actually increase sparseness in the + # part where they are applied. A = np.abs(random_state.randn(10, 10)) m = nmf.ProjectedGradientNMF(n_components=5, random_state=0).fit(A) data_sp = nmf.ProjectedGradientNMF(n_components=5, sparseness='data', @@ -132,7 +124,7 @@ def test_projgrad_nmf_sparseness(): def test_sparse_input(): - """Test that sparse matrices are accepted as input""" + # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix A = np.abs(random_state.randn(10, 10)) @@ -160,7 +152,7 @@ def test_sparse_input(): def test_sparse_transform(): - """Test that transform works on sparse data. Issue #2124""" + # Test that transform works on sparse data. Issue #2124 from scipy.sparse import csc_matrix A = np.abs(random_state.randn(5, 4)) diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index b31ca4d680404..34fe795d2b198 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -17,7 +17,7 @@ def test_pca(): - """PCA on dense arrays""" + # PCA on dense arrays pca = PCA(n_components=2) X = iris.data X_r = pca.fit(X).transform(X) @@ -47,7 +47,7 @@ def test_pca(): def test_whitening(): - """Check that PCA output has unit-variance""" + # Check that PCA output has unit-variance rng = np.random.RandomState(0) n_samples = 100 n_features = 80 @@ -93,7 +93,7 @@ def test_whitening(): def test_explained_variance(): - """Check that PCA output has unit-variance""" + # Check that PCA output has unit-variance rng = np.random.RandomState(0) n_samples = 100 n_features = 80 @@ -118,7 +118,7 @@ def test_explained_variance(): def test_pca_check_projection(): - """Test that the projection of data is correct""" + # Test that the projection of data is correct rng = np.random.RandomState(0) n, p = 100, 3 X = rng.randn(n, p) * .1 @@ -132,7 +132,7 @@ def test_pca_check_projection(): def test_pca_inverse(): - """Test that the projection of data can be inverted""" + # Test that the projection of data can be inverted rng = np.random.RandomState(0) n, p = 50, 3 X = rng.randn(n, p) # spherical data @@ -161,7 +161,7 @@ def test_pca_validation(): def test_randomized_pca_check_projection(): - """Test that the projection by RandomizedPCA on dense data is correct""" + # Test that the projection by RandomizedPCA on dense data is correct rng = np.random.RandomState(0) n, p = 100, 3 X = rng.randn(n, p) * .1 @@ -175,7 +175,7 @@ def test_randomized_pca_check_projection(): def test_randomized_pca_check_list(): - """Test that the projection by RandomizedPCA on list data is correct""" + # Test that the projection by RandomizedPCA on list data is correct X = [[1.0, 0.0], [0.0, 1.0]] X_transformed = RandomizedPCA(n_components=1, random_state=0).fit(X).transform(X) @@ -185,7 +185,7 @@ def test_randomized_pca_check_list(): def test_randomized_pca_inverse(): - """Test that RandomizedPCA is inversible on dense data""" + # Test that RandomizedPCA is inversible on dense data rng = np.random.RandomState(0) n, p = 50, 3 X = rng.randn(n, p) # spherical data @@ -209,7 +209,7 @@ def test_randomized_pca_inverse(): def test_pca_dim(): - """Check automated dimensionality setting""" + # Check automated dimensionality setting rng = np.random.RandomState(0) n, p = 100, 5 X = rng.randn(n, p) * .1 @@ -220,10 +220,8 @@ def test_pca_dim(): def test_infer_dim_1(): - """TODO: explain what this is testing - - Or at least use explicit variable names... - """ + # TODO: explain what this is testing + # Or at least use explicit variable names... n, p = 1000, 5 rng = np.random.RandomState(0) X = (rng.randn(n, p) * .1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) @@ -239,10 +237,8 @@ def test_infer_dim_1(): def test_infer_dim_2(): - """TODO: explain what this is testing - - Or at least use explicit variable names... - """ + # TODO: explain what this is testing + # Or at least use explicit variable names... n, p = 1000, 5 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 @@ -255,8 +251,6 @@ def test_infer_dim_2(): def test_infer_dim_3(): - """ - """ n, p = 100, 5 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 @@ -290,7 +284,7 @@ def test_infer_dim_by_explained_variance(): def test_pca_score(): - """Test that probabilistic PCA scoring yields a reasonable score""" + # Test that probabilistic PCA scoring yields a reasonable score n, p = 1000, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) @@ -302,7 +296,7 @@ def test_pca_score(): def test_pca_score2(): - """Test that probabilistic PCA correctly separated different datasets""" + # Test that probabilistic PCA correctly separated different datasets n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) @@ -320,7 +314,7 @@ def test_pca_score2(): def test_pca_score3(): - """Check that probabilistic PCA selects the right model""" + # Check that probabilistic PCA selects the right model n, p = 200, 3 rng = np.random.RandomState(0) Xl = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index 96cee5a690331..2e2eba08b7696 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -89,10 +89,8 @@ def test_fit_transform_parallel(): def test_transform_nan(): - """ - Test that SparsePCA won't return NaN when there is 0 feature in all - samples. - """ + # Test that SparsePCA won't return NaN when there is 0 feature in all + # samples. rng = np.random.RandomState(0) Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array Y[:, 0] = 0 diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index bc82ac2f9e2be..36eb50a78ce33 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -50,7 +50,7 @@ def test_classification(): - """Check classification for various parameter settings.""" + # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, @@ -73,7 +73,7 @@ def test_classification(): def test_sparse_classification(): - """Check classification for various parameter settings on sparse input.""" + # Check classification for various parameter settings on sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set""" @@ -132,7 +132,7 @@ def fit(self, X, y): def test_regression(): - """Check regression for various parameter settings.""" + # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], @@ -154,7 +154,7 @@ def test_regression(): def test_sparse_regression(): - """Check regression for various parameter settings on sparse input.""" + # Check regression for various parameter settings on sparse input. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], @@ -214,7 +214,7 @@ def fit(self, X, y): def test_bootstrap_samples(): - """Test that bootstraping samples generate non-perfect base estimators.""" + # Test that bootstraping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, @@ -242,7 +242,7 @@ def test_bootstrap_samples(): def test_bootstrap_features(): - """Test that bootstraping features may generate dupplicate features.""" + # Test that bootstraping features may generate dupplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, @@ -266,7 +266,7 @@ def test_bootstrap_features(): def test_probability(): - """Predict probabilities.""" + # Predict probabilities. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, @@ -298,8 +298,8 @@ def test_probability(): def test_oob_score_classification(): - """Check that oob prediction is a good estimation of the generalization - error.""" + # Check that oob prediction is a good estimation of the generalization + # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, @@ -328,8 +328,8 @@ def test_oob_score_classification(): def test_oob_score_regression(): - """Check that oob prediction is a good estimation of the generalization - error.""" + # Check that oob prediction is a good estimation of the generalization + # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, @@ -357,7 +357,7 @@ def test_oob_score_regression(): def test_single_estimator(): - """Check singleton ensembles.""" + # Check singleton ensembles. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, @@ -375,7 +375,7 @@ def test_single_estimator(): def test_error(): - """Test that it gives proper exception on deficient input.""" + # Test that it gives proper exception on deficient input. X, y = iris.data, iris.target base = DecisionTreeClassifier() @@ -408,7 +408,7 @@ def test_error(): def test_parallel_classification(): - """Check parallel classification.""" + # Check parallel classification. rng = check_random_state(0) # Classification @@ -454,7 +454,7 @@ def test_parallel_classification(): def test_parallel_regression(): - """Check parallel regression.""" + # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, @@ -480,7 +480,7 @@ def test_parallel_regression(): def test_gridsearch(): - """Check that bagging ensembles can be grid-searched.""" + # Check that bagging ensembles can be grid-searched. # Transform iris into a binary classification task X, y = iris.data, iris.target y[y == 2] = 1 @@ -495,7 +495,7 @@ def test_gridsearch(): def test_base_estimator(): - """Check base_estimator and its default values.""" + # Check base_estimator and its default values. rng = check_random_state(0) # Classification diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 9a1765819b1aa..0268715cde9ef 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -15,7 +15,7 @@ def test_base(): - """Check BaseEnsemble methods.""" + # Check BaseEnsemble methods. ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=3) iris = load_iris() @@ -34,8 +34,8 @@ def test_base(): def test_base_zero_n_estimators(): - """Check that instantiating a BaseEnsemble with n_estimators<=0 raises - a ValueError.""" + # Check that instantiating a BaseEnsemble with n_estimators<=0 raises + # a ValueError. ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=0) iris = load_iris() assert_raise_message(ValueError, diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index cd0697af20500..33aa5cb3e4050 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -105,7 +105,7 @@ def test_classification_toy(): def check_iris_criterion(name, criterion): - """Check consistency on dataset iris.""" + # Check consistency on dataset iris. ForestClassifier = FOREST_CLASSIFIERS[name] clf = ForestClassifier(n_estimators=10, criterion=criterion, @@ -129,7 +129,7 @@ def test_iris(): def check_boston_criterion(name, criterion): - """Check consistency on dataset boston house prices.""" + # Check consistency on dataset boston house prices. ForestRegressor = FOREST_REGRESSORS[name] clf = ForestRegressor(n_estimators=5, criterion=criterion, random_state=1) @@ -152,7 +152,7 @@ def test_boston(): def check_regressor_attributes(name): - """Regression models should not have a classes_ attribute.""" + # Regression models should not have a classes_ attribute. r = FOREST_REGRESSORS[name](random_state=0) assert_false(hasattr(r, "classes_")) assert_false(hasattr(r, "n_classes_")) @@ -168,7 +168,7 @@ def test_regressor_attributes(): def check_probability(name): - """Predict probabilities.""" + # Predict probabilities. ForestClassifier = FOREST_CLASSIFIERS[name] with np.errstate(divide="ignore"): clf = ForestClassifier(n_estimators=10, random_state=1, max_features=1, @@ -186,7 +186,7 @@ def test_probability(): def check_importances(name, X, y): - """Check variable importances.""" + # Check variable importances. ForestClassifier = FOREST_CLASSIFIERS[name] for n_jobs in [1, 2]: @@ -236,8 +236,8 @@ def test_unfitted_feature_importances(): def check_oob_score(name, X, y, n_estimators=20): - """Check that oob prediction is a good estimation of the generalization - error.""" + # Check that oob prediction is a good estimation of the generalization + # error. # Proper behavior est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=n_estimators, bootstrap=True) @@ -304,7 +304,7 @@ def check_gridsearch(name): def test_gridsearch(): - """Check that base trees can be grid-searched.""" + # Check that base trees can be grid-searched. for name in FOREST_CLASSIFIERS: yield check_gridsearch, name @@ -333,7 +333,7 @@ def test_parallel(): def check_pickle(name, X, y): - """Check pickability.""" + # Check pickability. ForestEstimator = FOREST_ESTIMATORS[name] obj = ForestEstimator(random_state=0) @@ -356,7 +356,7 @@ def test_pickle(): def check_multioutput(name): - """Check estimators on multi-output problems.""" + # Check estimators on multi-output problems. X_train = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-2, 1], [-1, 1], [-1, 2], [2, -1], [1, -1], [1, -2]] @@ -391,7 +391,7 @@ def test_multioutput(): def check_classes_shape(name): - """Test that n_classes_ and classes_ have proper shape.""" + # Test that n_classes_ and classes_ have proper shape. ForestClassifier = FOREST_CLASSIFIERS[name] # Classification, single output @@ -554,7 +554,7 @@ def test_distribution(): def check_max_leaf_nodes_max_depth(name, X, y): - """Test precedence of max_leaf_nodes over max_depth. """ + # Test precedence of max_leaf_nodes over max_depth. ForestEstimator = FOREST_ESTIMATORS[name] est = ForestEstimator(max_depth=1, max_leaf_nodes=4, n_estimators=1).fit(X, y) @@ -571,7 +571,7 @@ def test_max_leaf_nodes_max_depth(): def check_min_samples_leaf(name, X, y): - """Test if leaves contain more than leaf_count training examples""" + # Test if leaves contain more than leaf_count training examples ForestEstimator = FOREST_ESTIMATORS[name] # test both DepthFirstTreeBuilder and BestFirstTreeBuilder @@ -597,8 +597,8 @@ def test_min_samples_leaf(): def check_min_weight_fraction_leaf(name, X, y): - """Test if leaves contain at least min_weight_fraction_leaf of the - training set""" + # Test if leaves contain at least min_weight_fraction_leaf of the + # training set ForestEstimator = FOREST_ESTIMATORS[name] rng = np.random.RandomState(0) weights = rng.rand(X.shape[0]) @@ -671,7 +671,7 @@ def test_sparse_input(): def check_memory_layout(name, dtype): - """Check that it works no matter the memory layout""" + # Check that it works no matter the memory layout est = FOREST_ESTIMATORS[name](random_state=0, bootstrap=False) @@ -746,7 +746,7 @@ def test_1d_input(): def check_class_weights(name): - """Check class_weights resemble sample_weights behavior.""" + # Check class_weights resemble sample_weights behavior. ForestClassifier = FOREST_CLASSIFIERS[name] # Iris is balanced, so no effect expected for using 'auto' weights @@ -794,7 +794,7 @@ def test_class_weights(): def check_class_weight_auto_and_bootstrap_multi_output(name): - """Test class_weight works for multi-output""" + # Test class_weight works for multi-output ForestClassifier = FOREST_CLASSIFIERS[name] _y = np.vstack((y, np.array(y) * 2)).T clf = ForestClassifier(class_weight='auto', random_state=0) @@ -812,7 +812,7 @@ def test_class_weight_auto_and_bootstrap_multi_output(): def check_class_weight_errors(name): - """Test if class_weight raises errors and warnings when expected.""" + # Test if class_weight raises errors and warnings when expected. ForestClassifier = FOREST_CLASSIFIERS[name] _y = np.vstack((y, np.array(y) * 2)).T @@ -842,8 +842,8 @@ def test_class_weight_errors(): def check_warm_start(name, random_state=42): - """Test if fitting incrementally with warm start gives a forest of the - right size and the same results as a normal fit.""" + # Test if fitting incrementally with warm start gives a forest of the + # right size and the same results as a normal fit. X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1) ForestEstimator = FOREST_ESTIMATORS[name] clf_ws = None @@ -874,8 +874,7 @@ def test_warm_start(): def check_warm_start_clear(name): - """Test if fit clears state and grows a new forest when warm_start==False. - """ + # Test if fit clears state and grows a new forest when warm_start==False. X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1) ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=1, warm_start=False, @@ -897,7 +896,7 @@ def test_warm_start_clear(): def check_warm_start_smaller_n_estimators(name): - """Test if warm start second fit with smaller n_estimators raises error.""" + # Test if warm start second fit with smaller n_estimators raises error. X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1) ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=1, warm_start=True) @@ -912,8 +911,8 @@ def test_warm_start_smaller_n_estimators(): def check_warm_start_equal_n_estimators(name): - """Test if warm start with equal n_estimators does nothing and returns the - same forest and raises a warning.""" + # Test if warm start with equal n_estimators does nothing and returns the + # same forest and raises a warning. X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1) ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, @@ -938,7 +937,7 @@ def test_warm_start_equal_n_estimators(): def check_warm_start_oob(name): - """Test that the warm start computes oob score when asked.""" + # Test that the warm start computes oob score when asked. X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1) ForestEstimator = FOREST_ESTIMATORS[name] # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning. diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index c1133dc4c0d54..06aa68aaf4db4 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -46,7 +46,7 @@ def test_classification_toy(): - """Check classification on a toy dataset.""" + # Check classification on a toy dataset. for loss in ('deviance', 'exponential'): clf = GradientBoostingClassifier(loss=loss, n_estimators=10, @@ -64,7 +64,7 @@ def test_classification_toy(): def test_parameter_checks(): - """Check input parameter validation.""" + # Check input parameter validation. assert_raises(ValueError, GradientBoostingClassifier(n_estimators=0).fit, X, y) @@ -138,8 +138,8 @@ def test_loss_function(): def test_classification_synthetic(): - """Test GradientBoostingClassifier on synthetic dataset used by - Hastie et al. in ESLII Example 12.7. """ + # Test GradientBoostingClassifier on synthetic dataset used by + # Hastie et al. in ESLII Example 12.7. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] @@ -166,8 +166,8 @@ def test_classification_synthetic(): def test_boston(): - """Check consistency on dataset boston house prices with least squares - and least absolute deviation. """ + # Check consistency on dataset boston house prices with least squares + # and least absolute deviation. for loss in ("ls", "lad", "huber"): for subsample in (1.0, 0.5): last_y_pred = None @@ -196,7 +196,7 @@ def test_boston(): def test_iris(): - """Check consistency on dataset iris.""" + # Check consistency on dataset iris. for subsample in (1.0, 0.5): for sample_weight in (None, np.ones(len(iris.target))): clf = GradientBoostingClassifier(n_estimators=100, loss='deviance', @@ -208,8 +208,8 @@ def test_iris(): def test_regression_synthetic(): - """Test on synthetic regression datasets used in Leo Breiman, - `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). """ + # Test on synthetic regression datasets used in Leo Breiman, + # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). random_state = check_random_state(1) regression_params = {'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 1, 'learning_rate': 0.1, @@ -266,7 +266,7 @@ def test_feature_importances(): def test_probability_log(): - """Predict probabilities.""" + # Predict probabilities. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) @@ -285,7 +285,7 @@ def test_probability_log(): def test_check_inputs(): - """Test input checks (shape and type of X and y).""" + # Test input checks (shape and type of X and y). clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.fit, X, y + [0, 1]) @@ -303,7 +303,7 @@ def test_check_inputs(): def test_check_inputs_predict(): - """X has wrong shape """ + # X has wrong shape clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y) @@ -330,7 +330,7 @@ def test_check_inputs_predict(): def test_check_max_features(): - """test if max_features is valid. """ + # test if max_features is valid. clf = GradientBoostingRegressor(n_estimators=100, random_state=1, max_features=0) assert_raises(ValueError, clf.fit, X, y) @@ -344,7 +344,7 @@ def test_check_max_features(): assert_raises(ValueError, clf.fit, X, y) def test_max_feature_regression(): - """Test to make sure random state is set properly. """ + # Test to make sure random state is set properly. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] @@ -359,7 +359,7 @@ def test_max_feature_regression(): def test_max_feature_auto(): - """Test if max features is set properly for floats and str. """ + # Test if max features is set properly for floats and str. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) _, n_features = X.shape @@ -393,9 +393,8 @@ def test_max_feature_auto(): def test_staged_predict(): - """Test whether staged decision function eventually gives - the same prediction. - """ + # Test whether staged decision function eventually gives + # the same prediction. X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] @@ -416,9 +415,8 @@ def test_staged_predict(): def test_staged_predict_proba(): - """Test whether staged predict proba eventually gives - the same prediction. - """ + # Test whether staged predict proba eventually gives + # the same prediction. X, y = datasets.make_hastie_10_2(n_samples=1200, random_state=1) X_train, y_train = X[:200], y[:200] @@ -463,7 +461,7 @@ def test_staged_functions_defensive(): def test_serialization(): - """Check model serialization.""" + # Check model serialization. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y) @@ -483,7 +481,7 @@ def test_serialization(): def test_degenerate_targets(): - """Check if we can fit even though all targets are equal. """ + # Check if we can fit even though all targets are equal. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) # classifier should raise exception @@ -497,7 +495,7 @@ def test_degenerate_targets(): def test_quantile_loss(): - """Check if quantile loss with alpha=0.5 equals lad. """ + # Check if quantile loss with alpha=0.5 equals lad. clf_quantile = GradientBoostingRegressor(n_estimators=100, loss='quantile', max_depth=4, alpha=0.5, random_state=7) @@ -514,7 +512,7 @@ def test_quantile_loss(): def test_symbol_labels(): - """Test with non-integer class labels. """ + # Test with non-integer class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) symbol_y = tosequence(map(str, y)) @@ -525,7 +523,7 @@ def test_symbol_labels(): def test_float_class_labels(): - """Test with float class labels. """ + # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) float_y = np.asarray(y, dtype=np.float32) @@ -537,7 +535,7 @@ def test_float_class_labels(): def test_shape_y(): - """Test with float class labels. """ + # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) y_ = np.asarray(y, dtype=np.int32) @@ -552,7 +550,7 @@ def test_shape_y(): def test_mem_layout(): - """Test with different memory layouts of X and y""" + # Test with different memory layouts of X and y X_ = np.asfortranarray(X) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X_, y) @@ -581,7 +579,7 @@ def test_mem_layout(): def test_oob_improvement(): - """Test if oob improvement has correct shape and regression test. """ + # Test if oob improvement has correct shape and regression test. clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5) clf.fit(X, y) @@ -593,7 +591,7 @@ def test_oob_improvement(): def test_oob_improvement_raise(): - """Test if oob improvement has correct shape. """ + # Test if oob improvement has correct shape. clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=1.0) clf.fit(X, y) @@ -601,7 +599,7 @@ def test_oob_improvement_raise(): def test_oob_multilcass_iris(): - """Check OOB improvement on multi-class dataset.""" + # Check OOB improvement on multi-class dataset. clf = GradientBoostingClassifier(n_estimators=100, loss='deviance', random_state=1, subsample=0.5) clf.fit(iris.data, iris.target) @@ -618,7 +616,7 @@ def test_oob_multilcass_iris(): def test_verbose_output(): - """Check verbose=1 does not cause error. """ + # Check verbose=1 does not cause error. from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout @@ -643,7 +641,7 @@ def test_verbose_output(): def test_more_verbose_output(): - """Check verbose=2 does not cause error. """ + # Check verbose=2 does not cause error. from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout @@ -668,7 +666,7 @@ def test_more_verbose_output(): def test_warm_start(): - """Test if warm start equals fit. """ + # Test if warm start equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=200, max_depth=1) @@ -683,7 +681,7 @@ def test_warm_start(): def test_warm_start_n_estimators(): - """Test if warm start equals fit - set n_estimators. """ + # Test if warm start equals fit - set n_estimators. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=300, max_depth=1) @@ -698,7 +696,7 @@ def test_warm_start_n_estimators(): def test_warm_start_max_depth(): - """Test if possible to fit trees of different depth in ensemble. """ + # Test if possible to fit trees of different depth in ensemble. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) @@ -713,7 +711,7 @@ def test_warm_start_max_depth(): def test_warm_start_clear(): - """Test if fit clears state. """ + # Test if fit clears state. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1) @@ -728,7 +726,7 @@ def test_warm_start_clear(): def test_warm_start_zero_n_estimators(): - """Test if warm start with zero n_estimators raises error """ + # Test if warm start with zero n_estimators raises error X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) @@ -738,7 +736,7 @@ def test_warm_start_zero_n_estimators(): def test_warm_start_smaller_n_estimators(): - """Test if warm start with smaller n_estimators raises error """ + # Test if warm start with smaller n_estimators raises error X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) @@ -748,7 +746,7 @@ def test_warm_start_smaller_n_estimators(): def test_warm_start_equal_n_estimators(): - """Test if warm start with equal n_estimators does nothing """ + # Test if warm start with equal n_estimators does nothing X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1) @@ -762,7 +760,7 @@ def test_warm_start_equal_n_estimators(): def test_warm_start_oob_switch(): - """Test if oob can be turned on during warm start. """ + # Test if oob can be turned on during warm start. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1, warm_start=True) @@ -777,7 +775,7 @@ def test_warm_start_oob_switch(): def test_warm_start_oob(): - """Test if warm start OOB equals fit. """ + # Test if warm start OOB equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=200, max_depth=1, subsample=0.5, @@ -803,7 +801,7 @@ def early_stopping_monitor(i, est, locals): def test_monitor_early_stopping(): - """Test if monitor return value works. """ + # Test if monitor return value works. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: @@ -839,7 +837,7 @@ def test_monitor_early_stopping(): def test_complete_classification(): - """Test greedy trees with max_depth + 1 leafs. """ + # Test greedy trees with max_depth + 1 leafs. from sklearn.tree._tree import TREE_LEAF X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) k = 4 @@ -855,7 +853,7 @@ def test_complete_classification(): def test_complete_regression(): - """Test greedy trees with max_depth + 1 leafs. """ + # Test greedy trees with max_depth + 1 leafs. from sklearn.tree._tree import TREE_LEAF k = 4 @@ -869,7 +867,7 @@ def test_complete_regression(): def test_zero_estimator_reg(): - """Test if ZeroEstimator works for regression. """ + # Test if ZeroEstimator works for regression. est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init=ZeroEstimator()) est.fit(boston.data, boston.target) @@ -890,7 +888,7 @@ def test_zero_estimator_reg(): def test_zero_estimator_clf(): - """Test if ZeroEstimator works for classification. """ + # Test if ZeroEstimator works for classification. X = iris.data y = np.array(iris.target) est = GradientBoostingClassifier(n_estimators=20, max_depth=1, @@ -920,7 +918,7 @@ def test_zero_estimator_clf(): def test_max_leaf_nodes_max_depth(): - """Test preceedence of max_leaf_nodes over max_depth. """ + # Test preceedence of max_leaf_nodes over max_depth. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [GradientBoostingRegressor, GradientBoostingClassifier] @@ -937,10 +935,8 @@ def test_max_leaf_nodes_max_depth(): def test_warm_start_wo_nestimators_change(): - """Test if warm_start does nothing if n_estimators is not changed. - - Regression test for #3513. - """ + # Test if warm_start does nothing if n_estimators is not changed. + # Regression test for #3513. clf = GradientBoostingClassifier(n_estimators=10, warm_start=True) clf.fit([[0, 1], [2, 3]], [0, 1]) assert clf.estimators_.shape[0] == 10 @@ -949,7 +945,7 @@ def test_warm_start_wo_nestimators_change(): def test_probability_exponential(): - """Predict probabilities.""" + # Predict probabilities. clf = GradientBoostingClassifier(loss='exponential', n_estimators=100, random_state=1) diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py index 1c811aeba9af4..c2b4806ec496b 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py +++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py @@ -19,10 +19,8 @@ def test_binomial_deviance(): - """Check binomial deviance loss. - - Check against alternative definitions in ESLII. - """ + # Check binomial deviance loss. + # Check against alternative definitions in ESLII. bd = BinomialDeviance(2) # pred has the same BD for y in {0, 1} @@ -55,7 +53,7 @@ def test_binomial_deviance(): def test_log_odds_estimator(): - """Check log odds estimator. """ + # Check log odds estimator. est = LogOddsEstimator() assert_raises(ValueError, est.fit, None, np.array([1])) @@ -78,7 +76,7 @@ def test_sample_weight_smoke(): def test_sample_weight_init_estimators(): - """Smoke test for init estimators with sample weights. """ + # Smoke test for init estimators with sample weights. rng = check_random_state(13) X = rng.rand(100, 2) sample_weight = np.ones(100) @@ -145,7 +143,7 @@ def test_weighted_percentile_zero_weight(): def test_sample_weight_deviance(): - """Test if deviance supports sample weights. """ + # Test if deviance supports sample weights. rng = check_random_state(13) X = rng.rand(100, 2) sample_weight = np.ones(100) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index feebbe2480c0e..cec7efc46f03b 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -28,7 +28,7 @@ def test_partial_dependence_classifier(): - """Test partial dependence for classifier """ + # Test partial dependence for classifier clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(X, y) @@ -48,7 +48,7 @@ def test_partial_dependence_classifier(): def test_partial_dependence_multiclass(): - """Test partial dependence for multi-class classifier """ + # Test partial dependence for multi-class classifier clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) @@ -63,7 +63,7 @@ def test_partial_dependence_multiclass(): def test_partial_dependence_regressor(): - """Test partial dependence for regressor """ + # Test partial dependence for regressor clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(boston.data, boston.target) @@ -76,7 +76,7 @@ def test_partial_dependence_regressor(): def test_partial_dependecy_input(): - """Test input validation of partial dependence. """ + # Test input validation of partial dependence. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(X, y) @@ -105,7 +105,7 @@ def test_partial_dependecy_input(): @if_matplotlib def test_plot_partial_dependence(): - """Test partial dependence plot function. """ + # Test partial dependence plot function. clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(boston.data, boston.target) @@ -137,7 +137,7 @@ def test_plot_partial_dependence(): @if_matplotlib def test_plot_partial_dependence_input(): - """Test partial dependence plot function input checks. """ + # Test partial dependence plot function input checks. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) # not fitted yet @@ -172,7 +172,7 @@ def test_plot_partial_dependence_input(): @if_matplotlib def test_plot_partial_dependence_multiclass(): - """Test partial dependence plot function on multi-class input. """ + # Test partial dependence plot function on multi-class input. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index 608007623d81d..8a5a99dfa5d37 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -44,7 +44,7 @@ def test_classification_toy(): - """Check classification on a toy dataset.""" + # Check classification on a toy dataset. for alg in ['SAMME', 'SAMME.R']: clf = AdaBoostClassifier(algorithm=alg, random_state=0) clf.fit(X, y_class) @@ -55,14 +55,14 @@ def test_classification_toy(): def test_regression_toy(): - """Check classification on a toy dataset.""" + # Check classification on a toy dataset. clf = AdaBoostRegressor(random_state=0) clf.fit(X, y_regr) assert_array_equal(clf.predict(T), y_t_regr) def test_iris(): - """Check consistency on dataset iris.""" + # Check consistency on dataset iris. classes = np.unique(iris.target) clf_samme = prob_samme = None @@ -91,7 +91,7 @@ def test_iris(): def test_boston(): - """Check consistency on dataset boston house prices.""" + # Check consistency on dataset boston house prices. clf = AdaBoostRegressor(random_state=0) clf.fit(boston.data, boston.target) score = clf.score(boston.data, boston.target) @@ -99,7 +99,7 @@ def test_boston(): def test_staged_predict(): - """Check staged predictions.""" + # Check staged predictions. rng = np.random.RandomState(0) iris_weights = rng.randint(10, size=iris.target.shape) boston_weights = rng.randint(10, size=boston.target.shape) @@ -143,7 +143,7 @@ def test_staged_predict(): def test_gridsearch(): - """Check that base trees can be grid-searched.""" + # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = {'n_estimators': (1, 2), @@ -162,7 +162,7 @@ def test_gridsearch(): def test_pickle(): - """Check pickability.""" + # Check pickability. import pickle # Adaboost classifier @@ -190,7 +190,7 @@ def test_pickle(): def test_importances(): - """Check variable importances.""" + # Check variable importances. X, y = datasets.make_classification(n_samples=2000, n_features=10, n_informative=3, @@ -211,7 +211,7 @@ def test_importances(): def test_error(): - """Test that it gives proper exception on deficient input.""" + # Test that it gives proper exception on deficient input. assert_raises(ValueError, AdaBoostClassifier(learning_rate=-1).fit, X, y_class) @@ -226,7 +226,7 @@ def test_error(): def test_base_estimator(): - """Test different base estimators.""" + # Test different base estimators. from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC @@ -273,7 +273,7 @@ def test_sample_weight_missing(): def test_sparse_classification(): - """Check classification with sparse input.""" + # Check classification with sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set.""" @@ -371,7 +371,7 @@ def fit(self, X, y, sample_weight=None): def test_sparse_regression(): - """Check regression with sparse input.""" + # Check regression with sparse input. class CustomSVR(SVR): """SVR variant that records the nature of the training set.""" diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index 2860eacb35640..c12919762aa14 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -76,13 +76,13 @@ def test_hasher_invalid_input(): def test_hasher_set_params(): - """Test delayed input validation in fit (useful for grid search).""" + # Test delayed input validation in fit (useful for grid search). hasher = FeatureHasher() hasher.set_params(n_features=np.inf) assert_raises(TypeError, hasher.fit) def test_hasher_zeros(): - """Assert that no zeros are materialized in the output.""" + # Assert that no zeros are materialized in the output. X = FeatureHasher().transform([{'foo': 0}]) assert_equal(X.data.shape, (0,)) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 5b67e5f5d7a3a..c1cac567ce4fd 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -561,7 +561,7 @@ def test_vectorizer_max_features(): def test_count_vectorizer_max_features(): - """Regression test: max_features didn't work correctly in 0.14.""" + # Regression test: max_features didn't work correctly in 0.14. cv_1 = CountVectorizer(max_features=1) cv_3 = CountVectorizer(max_features=3) @@ -860,8 +860,7 @@ def test_pickling_vectorizer(): def test_stop_words_removal(): - """Ensure that deleting the stop_words_ attribute doesn't affect transform - """ + # Ensure that deleting the stop_words_ attribute doesn't affect transform fitted_vectorizers = ( TfidfVectorizer().fit(JUNK_FOOD_DOCS), diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py index 06ffdff629a50..baaa4907bad5e 100644 --- a/sklearn/feature_selection/tests/test_chi2.py +++ b/sklearn/feature_selection/tests/test_chi2.py @@ -29,7 +29,7 @@ def mkchi2(k): def test_chi2(): - """Test Chi2 feature extraction""" + # Test Chi2 feature extraction chi2 = mkchi2(k=1).fit(X, y) chi2 = mkchi2(k=1).fit(X, y) @@ -52,24 +52,22 @@ def test_chi2(): def test_chi2_coo(): - """Check that chi2 works with a COO matrix - - (as returned by CountVectorizer, DictVectorizer) - """ + # Check that chi2 works with a COO matrix + # (as returned by CountVectorizer, DictVectorizer) Xcoo = coo_matrix(X) mkchi2(k=2).fit_transform(Xcoo, y) # if we got here without an exception, we're safe def test_chi2_negative(): - """Check for proper error on negative numbers in the input X.""" + # Check for proper error on negative numbers in the input X. X, y = [[0, 1], [-1e-20, 1]], [0, 1] for X in (X, np.array(X), csr_matrix(X)): assert_raises(ValueError, chi2, X, y) def test_chisquare(): - """Test replacement for scipy.stats.chisquare against the original.""" + # Test replacement for scipy.stats.chisquare against the original. obs = np.array([[2., 2.], [1., 1.]]) exp = np.array([[1.5, 1.5], diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index 4b91b0b311b03..adc289888fa1f 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -34,7 +34,7 @@ # Test the score functions def test_f_oneway_vs_scipy_stats(): - """Test that our f_oneway gives the same result as scipy.stats""" + # Test that our f_oneway gives the same result as scipy.stats rng = np.random.RandomState(0) X1 = rng.randn(10, 3) X2 = 1 + rng.randn(10, 3) @@ -59,10 +59,8 @@ def test_f_oneway_ints(): def test_f_classif(): - """ - Test whether the F test yields meaningful results - on a simple simulated classification problem - """ + # Test whether the F test yields meaningful results + # on a simple simulated classification problem X, y = make_classification(n_samples=200, n_features=20, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, @@ -81,10 +79,8 @@ def test_f_classif(): def test_f_regression(): - """ - Test whether the F test yields meaningful results - on a simple simulated regression problem - """ + # Test whether the F test yields meaningful results + # on a simple simulated regression problem X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) @@ -103,10 +99,8 @@ def test_f_regression(): def test_f_regression_input_dtype(): - """ - Test whether f_regression returns the same value - for any numeric data_type - """ + # Test whether f_regression returns the same value + # for any numeric data_type rng = np.random.RandomState(0) X = rng.rand(10, 20) y = np.arange(10).astype(np.int) @@ -118,11 +112,9 @@ def test_f_regression_input_dtype(): def test_f_regression_center(): - """Test whether f_regression preserves dof according to 'center' argument - - We use two centered variates so we have a simple relationship between - F-score with variates centering and F-score without variates centering. - """ + # Test whether f_regression preserves dof according to 'center' argument + # We use two centered variates so we have a simple relationship between + # F-score with variates centering and F-score without variates centering. # Create toy example X = np.arange(-5, 6).reshape(-1, 1) # X has zero mean n_samples = X.size @@ -137,10 +129,8 @@ def test_f_regression_center(): def test_f_classif_multi_class(): - """ - Test whether the F test yields meaningful results - on a simple simulated classification problem - """ + # Test whether the F test yields meaningful results + # on a simple simulated classification problem X, y = make_classification(n_samples=200, n_features=20, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, @@ -156,11 +146,9 @@ def test_f_classif_multi_class(): def test_select_percentile_classif(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple classification problem - with the percentile heuristic - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple classification problem + # with the percentile heuristic X, y = make_classification(n_samples=200, n_features=20, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, @@ -179,11 +167,9 @@ def test_select_percentile_classif(): def test_select_percentile_classif_sparse(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple classification problem - with the percentile heuristic - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple classification problem + # with the percentile heuristic X, y = make_classification(n_samples=200, n_features=20, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, @@ -213,11 +199,9 @@ def test_select_percentile_classif_sparse(): # Test univariate selection in classification settings def test_select_kbest_classif(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple classification problem - with the k best heuristic - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple classification problem + # with the k best heuristic X, y = make_classification(n_samples=200, n_features=20, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, @@ -236,9 +220,7 @@ def test_select_kbest_classif(): def test_select_kbest_all(): - """ - Test whether k="all" correctly returns all features. - """ + # Test whether k="all" correctly returns all features. X, y = make_classification(n_samples=20, n_features=10, shuffle=False, random_state=0) @@ -248,9 +230,7 @@ def test_select_kbest_all(): def test_select_kbest_zero(): - """ - Test whether k=0 correctly returns no features. - """ + # Test whether k=0 correctly returns no features. X, y = make_classification(n_samples=20, n_features=10, shuffle=False, random_state=0) @@ -265,11 +245,9 @@ def test_select_kbest_zero(): def test_select_heuristics_classif(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple classification problem - with the fdr, fwe and fpr heuristics - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple classification problem + # with the fdr, fwe and fpr heuristics X, y = make_classification(n_samples=200, n_features=20, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, @@ -300,11 +278,9 @@ def assert_best_scores_kept(score_filter): def test_select_percentile_regression(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple regression problem - with the percentile heuristic - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple regression problem + # with the percentile heuristic X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) @@ -327,10 +303,8 @@ def test_select_percentile_regression(): def test_select_percentile_regression_full(): - """ - Test whether the relative univariate feature selection - selects all features when '100%' is asked. - """ + # Test whether the relative univariate feature selection + # selects all features when '100%' is asked. X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) @@ -358,11 +332,9 @@ def test_invalid_percentile(): def test_select_kbest_regression(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple regression problem - with the k best heuristic - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple regression problem + # with the k best heuristic X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0, noise=10) @@ -379,11 +351,9 @@ def test_select_kbest_regression(): def test_select_heuristics_regression(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple regression problem - with the fpr, fdr or fwe heuristics - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple regression problem + # with the fpr, fdr or fwe heuristics X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0, noise=10) @@ -401,9 +371,7 @@ def test_select_heuristics_regression(): def test_select_fdr_regression(): - """ - Test that fdr heuristic actually has low FDR. - """ + # Test that fdr heuristic actually has low FDR. def single_fdr(alpha, n_informative, random_state): X, y = make_regression(n_samples=150, n_features=20, n_informative=n_informative, shuffle=False, @@ -445,11 +413,9 @@ def single_fdr(alpha, n_informative, random_state): def test_select_fwe_regression(): - """ - Test whether the relative univariate feature selection - gets the correct items in a simple regression problem - with the fwe heuristic - """ + # Test whether the relative univariate feature selection + # gets the correct items in a simple regression problem + # with the fwe heuristic X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) @@ -466,10 +432,8 @@ def test_select_fwe_regression(): def test_selectkbest_tiebreaking(): - """Test whether SelectKBest actually selects k features in case of ties. - - Prior to 0.11, SelectKBest would return more features than requested. - """ + # Test whether SelectKBest actually selects k features in case of ties. + # Prior to 0.11, SelectKBest would return more features than requested. Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]] y = [1] dummy_score = lambda X, y: (X[0], X[0]) @@ -486,8 +450,7 @@ def test_selectkbest_tiebreaking(): def test_selectpercentile_tiebreaking(): - """Test if SelectPercentile selects the right n_features in case of ties. - """ + # Test if SelectPercentile selects the right n_features in case of ties. Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]] y = [1] dummy_score = lambda X, y: (X[0], X[0]) @@ -504,7 +467,7 @@ def test_selectpercentile_tiebreaking(): def test_tied_pvalues(): - """Test whether k-best and percentiles work with tied pvalues from chi2.""" + # Test whether k-best and percentiles work with tied pvalues from chi2. # chi2 will return the same p-values for the following features, but it # will return different scores. X0 = np.array([[10000, 9999, 9998], [1, 1, 1]]) @@ -522,7 +485,7 @@ def test_tied_pvalues(): def test_tied_scores(): - """Test for stable sorting in k-best with tied scores.""" + # Test for stable sorting in k-best with tied scores. X_train = np.array([[0, 0, 0], [1, 1, 1]]) y_train = [0, 1] @@ -533,7 +496,7 @@ def test_tied_scores(): def test_nans(): - """Assert that SelectKBest and SelectPercentile can handle NaNs.""" + # Assert that SelectKBest and SelectPercentile can handle NaNs. # First feature has zero variance to confuse f_classif (ANOVA) and # make it return a NaN. X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]] @@ -567,7 +530,7 @@ def test_invalid_k(): def test_f_classif_constant_feature(): - """Test that f_classif warns if a feature is constant throughout.""" + # Test that f_classif warns if a feature is constant throughout. X, y = make_classification(n_samples=10, n_features=5) X[:, 0] = 2.0 diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py index 10cae41f15e45..87af92d7fce15 100644 --- a/sklearn/feature_selection/tests/test_variance_threshold.py +++ b/sklearn/feature_selection/tests/test_variance_threshold.py @@ -11,7 +11,7 @@ def test_zero_variance(): - """Test VarianceThreshold with default setting, zero variance.""" + # Test VarianceThreshold with default setting, zero variance. for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]: sel = VarianceThreshold().fit(X) @@ -22,7 +22,7 @@ def test_zero_variance(): def test_variance_threshold(): - """Test VarianceThreshold with custom variance.""" + # Test VarianceThreshold with custom variance. for X in [data, csr_matrix(data)]: X = VarianceThreshold(threshold=.4).fit_transform(X) assert_equal((len(data), 1), X.shape) diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py index a3c09f4664891..36c041acbbc7f 100644 --- a/sklearn/gaussian_process/tests/test_gaussian_process.py +++ b/sklearn/gaussian_process/tests/test_gaussian_process.py @@ -25,12 +25,9 @@ def test_1d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): - """ - MLE estimation of a one-dimensional Gaussian Process model. - Check random start optimization. - - Test the interpolating property. - """ + # MLE estimation of a one-dimensional Gaussian Process model. + # Check random start optimization. + # Test the interpolating property. gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0, theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=random_start, verbose=False).fit(X, y) @@ -43,12 +40,9 @@ def test_1d(regr=regression.constant, corr=correlation.squared_exponential, def test_2d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): - """ - MLE estimation of a two-dimensional Gaussian Process model accounting for - anisotropy. Check random start optimization. - - Test the interpolating property. - """ + # MLE estimation of a two-dimensional Gaussian Process model accounting for + # anisotropy. Check random start optimization. + # Test the interpolating property. b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2. X = np.array([[-4.61611719, -6.00099547], @@ -78,12 +72,9 @@ def test_2d(regr=regression.constant, corr=correlation.squared_exponential, def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): - """ - MLE estimation of a two-dimensional Gaussian Process model accounting for - anisotropy. Check random start optimization. - - Test the GP interpolation for 2D output - """ + # MLE estimation of a two-dimensional Gaussian Process model accounting for + # anisotropy. Check random start optimization. + # Test the GP interpolation for 2D output b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2. f = lambda x: np.vstack((g(x), g(x))).T @@ -113,10 +104,8 @@ def test_wrong_number_of_outputs(): def test_more_builtin_correlation_models(random_start=1): - """ - Repeat test_1d and test_2d for several built-in correlation - models specified as strings. - """ + # Repeat test_1d and test_2d for several built-in correlation + # models specified as strings. all_corr = ['absolute_exponential', 'squared_exponential', 'cubic', 'linear'] @@ -127,10 +116,8 @@ def test_more_builtin_correlation_models(random_start=1): def test_ordinary_kriging(): - """ - Repeat test_1d and test_2d with given regression weights (beta0) for - different regression models (Ordinary Kriging). - """ + # Repeat test_1d and test_2d with given regression weights (beta0) for + # different regression models (Ordinary Kriging). test_1d(regr='linear', beta0=[0., 0.5]) test_1d(regr='quadratic', beta0=[0., 0.5, 0.5]) test_2d(regr='linear', beta0=[0., 0.5, 0.5]) @@ -146,10 +133,8 @@ def test_no_normalize(): def test_random_starts(): - """ - Test that an increasing number of random-starts of GP fitting only - increases the reduced likelihood function of the optimal theta. - """ + # Test that an increasing number of random-starts of GP fitting only + # increases the reduced likelihood function of the optimal theta. n_samples, n_features = 50, 3 np.random.seed(0) rng = np.random.RandomState(0) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index cf04c8f205cee..6fd9ef74ccc0d 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -17,9 +17,7 @@ def test_linear_regression(): - """ - Test LinearRegression on a simple dataset. - """ + # Test LinearRegression on a simple dataset. # a simple dataset X = [[1], [2]] Y = [1, 2] @@ -43,9 +41,7 @@ def test_linear_regression(): def test_fit_intercept(): - """ - Test assertions on betas shape. - """ + # Test assertions on betas shape. X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]]) X3 = np.array([[0.27677969, 0.70693172, 0.01628859], @@ -253,7 +249,7 @@ def test_sparse_center_data(): def test_csr_sparse_center_data(): - """Test output format of sparse_center_data, when input is csr""" + # Test output format of sparse_center_data, when input is csr X, y = make_regression() X[X < 2.5] = 0.0 csr = sparse.csr_matrix(X) diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 100d50f6fcbf1..3bb09b1a44933 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -14,9 +14,7 @@ def test_bayesian_on_diabetes(): - """ - Test BayesianRidge on diabetes - """ + # Test BayesianRidge on diabetes raise SkipTest("XFailed Test") diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target @@ -37,9 +35,7 @@ def test_bayesian_on_diabetes(): def test_toy_bayesian_ridge_object(): - """ - Test BayesianRidge on toy - """ + # Test BayesianRidge on toy X = np.array([[1], [2], [6], [8], [10]]) Y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(compute_score=True) @@ -51,9 +47,7 @@ def test_toy_bayesian_ridge_object(): def test_toy_ard_object(): - """ - Test BayesianRegression ARD classifier - """ + # Test BayesianRegression ARD classifier X = np.array([[1], [2], [3]]) Y = np.array([1, 2, 3]) clf = ARDRegression(compute_score=True) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index cada4377d1807..21dcd1262ab60 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -33,7 +33,7 @@ def check_warnings(): def test_lasso_zero(): - """Check that the lasso can handle zero data without crashing""" + # Check that the lasso can handle zero data without crashing X = [[0], [0], [0]] y = [0, 0, 0] clf = Lasso(alpha=0.1).fit(X, y) @@ -44,12 +44,9 @@ def test_lasso_zero(): def test_lasso_toy(): - """ - Test Lasso on a toy example for various values of alpha. - - When validating this against glmnet notice that glmnet divides it - against nobs. - """ + # Test Lasso on a toy example for various values of alpha. + # When validating this against glmnet notice that glmnet divides it + # against nobs. X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line @@ -85,14 +82,10 @@ def test_lasso_toy(): def test_enet_toy(): - """ - Test ElasticNet for various parameters of alpha and l1_ratio. - - Actually, the parameters alpha = 0 should not be allowed. However, - we test it as a border case. - - ElasticNet is tested with and without precomputed Gram matrix - """ + # Test ElasticNet for various parameters of alpha and l1_ratio. + # Actually, the parameters alpha = 0 should not be allowed. However, + # we test it as a border case. + # ElasticNet is tested with and without precomputed Gram matrix X = np.array([[-1.], [0.], [1.]]) Y = [-1, 0, 1] # just a straight line @@ -540,11 +533,9 @@ def test_warm_start_convergence_with_regularizer_decrement(): def test_random_descent(): - """Test that both random and cyclic selection give the same results. - - Ensure that the test models fully converge and check a wide - range of conditions. - """ + # Test that both random and cyclic selection give the same results. + # Ensure that the test models fully converge and check a wide + # range of conditions. # This uses the coordinate descent algo using the gram trick. X, y, _, _ = build_dataset(n_samples=50, n_features=20) @@ -587,9 +578,7 @@ def test_random_descent(): def test_deprection_precompute_enet(): - """ - Test that setting precompute="auto" gives a Deprecation Warning. - """ + # Test that setting precompute="auto" gives a Deprecation Warning. X, y, _, _ = build_dataset(n_samples=20, n_features=10) clf = ElasticNet(precompute="auto") @@ -599,9 +588,7 @@ def test_deprection_precompute_enet(): def test_enet_path_positive(): - """ - Test that the coefs returned by positive=True in enet_path are positive - """ + # Test that the coefs returned by positive=True in enet_path are positive X, y, _, _ = build_dataset(n_samples=50, n_features=50) for path in [enet_path, lasso_path]: @@ -610,9 +597,7 @@ def test_enet_path_positive(): def test_sparse_dense_descent_paths(): - """ - Test that dense and sparse input give the same input for descent paths. - """ + # Test that dense and sparse input give the same input for descent paths. X, y, _, _ = build_dataset(n_samples=50, n_features=20) csr = sparse.csr_matrix(X) for path in [enet_path, lasso_path]: diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index d7e49d5d25dea..35e446a68c3ac 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -20,9 +20,7 @@ def test_simple(): - """ - Principle of Lars is to keep covariances tied and decreasing - """ + # Principle of Lars is to keep covariances tied and decreasing # also test verbose output from sklearn.externals.six.moves import cStringIO as StringIO @@ -52,9 +50,7 @@ def test_simple(): def test_simple_precomputed(): - """ - The same, with precomputed Gram matrix - """ + # The same, with precomputed Gram matrix G = np.dot(diabetes.data.T, diabetes.data) alphas_, active, coef_path_ = linear_model.lars_path( @@ -74,9 +70,7 @@ def test_simple_precomputed(): def test_all_precomputed(): - """ - Test that lars_path with precomputed Gram and Xy gives the right answer - """ + # Test that lars_path with precomputed Gram and Xy gives the right answer X, y = diabetes.data, diabetes.target G = np.dot(X.T, X) Xy = np.dot(X.T, y) @@ -88,10 +82,8 @@ def test_all_precomputed(): def test_lars_lstsq(): - """ - Test that Lars gives least square solution at the end - of the path - """ + # Test that Lars gives least square solution at the end + # of the path X1 = 3 * diabetes.data # use un-normalized dataset clf = linear_model.LassoLars(alpha=0.) clf.fit(X1, y) @@ -100,17 +92,15 @@ def test_lars_lstsq(): def test_lasso_gives_lstsq_solution(): - """ - Test that Lars Lasso gives least square solution at the end - of the path - """ + # Test that Lars Lasso gives least square solution at the end + # of the path alphas_, active, coef_path_ = linear_model.lars_path(X, y, method="lasso") coef_lstsq = np.linalg.lstsq(X, y)[0] assert_array_almost_equal(coef_lstsq, coef_path_[:, -1]) def test_collinearity(): - """Check that lars_path is robust to collinearity in input""" + # Check that lars_path is robust to collinearity in input X = np.array([[3., 3., 1.], [2., 2., 0.], [1., 1., 0]]) @@ -133,9 +123,7 @@ def test_collinearity(): def test_no_path(): - """ - Test that the ``return_path=False`` option returns the correct output - """ + # Test that the ``return_path=False`` option returns the correct output alphas_, active_, coef_path_ = linear_model.lars_path( diabetes.data, diabetes.target, method="lar") @@ -147,9 +135,7 @@ def test_no_path(): def test_no_path_precomputed(): - """ - Test that the ``return_path=False`` option with Gram remains correct - """ + # Test that the ``return_path=False`` option with Gram remains correct G = np.dot(diabetes.data.T, diabetes.data) @@ -164,9 +150,7 @@ def test_no_path_precomputed(): def test_no_path_all_precomputed(): - """ - Test that the ``return_path=False`` option with Gram and Xy remains correct - """ + # Test that the ``return_path=False`` option with Gram and Xy remains correct X, y = 3 * diabetes.data, diabetes.target G = np.dot(X.T, X) Xy = np.dot(X.T, y) @@ -217,10 +201,8 @@ def test_rank_deficient_design(): def test_lasso_lars_vs_lasso_cd(verbose=False): - """ - Test that LassoLars and Lasso using coordinate descent give the - same results. - """ + # Test that LassoLars and Lasso using coordinate descent give the + # same results. X = 3 * diabetes.data alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') @@ -256,11 +238,9 @@ def test_lasso_lars_vs_lasso_cd(verbose=False): def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): - """ - Test that LassoLars and Lasso using coordinate descent give the - same results when early stopping is used. - (test : before, in the middle, and in the last part of the path) - """ + # Test that LassoLars and Lasso using coordinate descent give the + # same results when early stopping is used. + # (test : before, in the middle, and in the last part of the path) alphas_min = [10, 0.9, 1e-4] for alphas_min in alphas_min: alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', @@ -358,11 +338,8 @@ def objective_function(coef): def test_lars_add_features(): - """ - assure that at least some features get added if necessary - - test for 6d2b4c - """ + # assure that at least some features get added if necessary + # test for 6d2b4c # Hilbert matrix n = 5 H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis]) @@ -381,9 +358,7 @@ def test_lars_n_nonzero_coefs(verbose=False): def test_multitarget(): - """ - Assure that estimators receiving multidimensional y do the right thing - """ + # Assure that estimators receiving multidimensional y do the right thing X = diabetes.data Y = np.vstack([diabetes.target, diabetes.target ** 2]).T n_targets = Y.shape[1] @@ -406,12 +381,10 @@ def test_multitarget(): def test_lars_cv(): - """ Test the LassoLarsCV object by checking that the optimal alpha - increases as the number of samples increases. - - This property is not actually garantied in general and is just a - property of the given dataset, with the given steps chosen. - """ + # Test the LassoLarsCV object by checking that the optimal alpha + # increases as the number of samples increases. + # This property is not actually garantied in general and is just a + # property of the given dataset, with the given steps chosen. old_alpha = 0 lars_cv = linear_model.LassoLarsCV() for length in (400, 200, 100): @@ -423,11 +396,10 @@ def test_lars_cv(): def test_lasso_lars_ic(): - """ Test the LassoLarsIC object by checking that - - some good features are selected. - - alpha_bic > alpha_aic - - n_nonzero_bic < n_nonzero_aic - """ + # Test the LassoLarsIC object by checking that + # - some good features are selected. + # - alpha_bic > alpha_aic + # - n_nonzero_bic < n_nonzero_aic lars_bic = linear_model.LassoLarsIC('bic') lars_aic = linear_model.LassoLarsIC('aic') rng = np.random.RandomState(42) @@ -448,7 +420,7 @@ def test_lasso_lars_ic(): def test_no_warning_for_zero_mse(): - """LassoLarsIC should not warn for log of zero MSE.""" + # LassoLarsIC should not warn for log of zero MSE. y = np.arange(10, dtype=float) X = y.reshape(-1, 1) lars = linear_model.LassoLarsIC(normalize=False) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 40a5d3e000fd5..88fcc68f48256 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -51,10 +51,8 @@ def check_predictions(clf, X, y): def test_predict_2_classes(): - """Simple sanity check on a 2 classes dataset - - Make sure it predicts the correct result on simple datasets. - """ + # Simple sanity check on a 2 classes dataset + # Make sure it predicts the correct result on simple datasets. check_predictions(LogisticRegression(random_state=0), X, Y1) check_predictions(LogisticRegression(random_state=0), X_sp, Y1) @@ -68,7 +66,7 @@ def test_predict_2_classes(): def test_error(): - """Test for appropriate exception on errors""" + # Test for appropriate exception on errors assert_raises(ValueError, LogisticRegression(C=-1).fit, X, Y1) @@ -78,7 +76,7 @@ def test_predict_3_classes(): def test_predict_iris(): - """Test logistic regression with the iris dataset""" + # Test logistic regression with the iris dataset n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] @@ -112,7 +110,7 @@ def test_multinomial_validation(): def test_multinomial_binary(): - """Test multinomial LR on a binary problem.""" + # Test multinomial LR on a binary problem. target = (iris.target > 0).astype(np.intp) target = np.array(["setosa", "not-setosa"])[target] @@ -133,7 +131,7 @@ def test_multinomial_binary(): def test_sparsify(): - """Test sparsify and densify members.""" + # Test sparsify and densify members. n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] clf = LogisticRegression(random_state=0).fit(iris.data, target) @@ -156,7 +154,7 @@ def test_sparsify(): def test_inconsistent_input(): - """Test that an exception is raised on inconsistent input""" + # Test that an exception is raised on inconsistent input rng = np.random.RandomState(0) X_ = rng.random_sample((5, 10)) y_ = np.ones(X_.shape[0]) @@ -174,7 +172,7 @@ def test_inconsistent_input(): def test_write_parameters(): - """Test that we can write to coef_ and intercept_""" + # Test that we can write to coef_ and intercept_ clf = LogisticRegression(random_state=0) clf.fit(X, Y1) clf.coef_[:] = 0 @@ -184,17 +182,15 @@ def test_write_parameters(): @raises(ValueError) def test_nan(): - """Test proper NaN handling. - - Regression test for Issue #252: fit used to go into an infinite loop. - """ + # Test proper NaN handling. + # Regression test for Issue #252: fit used to go into an infinite loop. Xnan = np.array(X, dtype=np.float64) Xnan[0, 1] = np.nan LogisticRegression(random_state=0).fit(Xnan, Y1) def test_consistency_path(): - """Test that the path algorithm is consistent""" + # Test that the path algorithm is consistent rng = np.random.RandomState(0) X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2))) y = [1] * 100 + [-1] * 100 @@ -315,7 +311,7 @@ def test_logistic_loss_grad_hess(): def test_logistic_cv(): - """test for LogisticRegressionCV object""" + # test for LogisticRegressionCV object n_samples, n_features = 50, 5 rng = np.random.RandomState(0) X_ref = rng.randn(n_samples, n_features) @@ -388,7 +384,7 @@ def test_intercept_logistic_helper(): def test_ovr_multinomial_iris(): - """Test that OvR and multinomial are correct using the iris dataset.""" + # Test that OvR and multinomial are correct using the iris dataset. train, target = iris.data, iris.target n_samples, n_features = train.shape @@ -491,7 +487,7 @@ def test_logistic_regressioncv_class_weights(): def test_logistic_regression_convergence_warnings(): - """Test that warnings are raised if model does not converge""" + # Test that warnings are raised if model does not converge X, y = make_classification(n_samples=20, n_features=20) clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1) @@ -500,7 +496,7 @@ def test_logistic_regression_convergence_warnings(): def test_logistic_regression_multinomial(): - """Tests for the multinomial option in logistic regression""" + # Tests for the multinomial option in logistic regression # Some basic attributes of Logistic Regression n_samples, n_features, n_classes = 50, 20, 3 @@ -576,13 +572,11 @@ def test_multinomial_loss_grad_hess(): def test_liblinear_decision_function_zero(): - """Test negative prediction when decision_function values are zero. - - Liblinear predicts the positive class when decision_function values - are zero. This is a test to verify that we do not do the same. - See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600 - and the PR https://github.com/scikit-learn/scikit-learn/pull/3623 - """ + # Test negative prediction when decision_function values are zero. + # Liblinear predicts the positive class when decision_function values + # are zero. This is a test to verify that we do not do the same. + # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600 + # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623 X, y = make_classification(n_samples=5, n_features=5) clf = LogisticRegression(fit_intercept=False) clf.fit(X, y) @@ -593,7 +587,7 @@ def test_liblinear_decision_function_zero(): def test_liblinear_logregcv_sparse(): - """Test LogRegCV with solver='liblinear' works for sparse matrices""" + # Test LogRegCV with solver='liblinear' works for sparse matrices X, y = make_classification(n_samples=10, n_features=5) clf = LogisticRegressionCV(solver='liblinear') diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py index c62cec838af87..55d4fb23cfadb 100644 --- a/sklearn/linear_model/tests/test_passive_aggressive.py +++ b/sklearn/linear_model/tests/test_passive_aggressive.py @@ -89,7 +89,7 @@ def test_classifier_partial_fit(): def test_classifier_refit(): - """Classifier can be retrained on different labels and features.""" + # Classifier can be retrained on different labels and features. clf = PassiveAggressiveClassifier().fit(X, y) assert_array_equal(clf.classes_, np.unique(y)) diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py index 3f202b3bf4000..9870eba987625 100644 --- a/sklearn/linear_model/tests/test_randomized_l1.py +++ b/sklearn/linear_model/tests/test_randomized_l1.py @@ -27,7 +27,7 @@ def test_lasso_stability_path(): - """Check lasso stability path""" + # Check lasso stability path # Load diabetes data and add noisy features scaling = 0.3 coef_grid, scores_path = lasso_stability_path(X, y, scaling=scaling, @@ -39,7 +39,7 @@ def test_lasso_stability_path(): def test_randomized_lasso(): - """Check randomized lasso""" + # Check randomized lasso scaling = 0.3 selection_threshold = 0.5 @@ -76,7 +76,7 @@ def test_randomized_lasso(): def test_randomized_logistic(): - """Check randomized sparse logistic regression""" + # Check randomized sparse logistic regression iris = load_iris() X = iris.data[:, [0, 2]] y = iris.target @@ -102,7 +102,7 @@ def test_randomized_logistic(): def test_randomized_logistic_sparse(): - """Check randomized sparse logistic regression on sparse data""" + # Check randomized sparse logistic regression on sparse data iris = load_iris() X = iris.data[:, [0, 2]] y = iris.target diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 677fa88b70511..0b1ac2487745d 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -48,11 +48,9 @@ def test_ridge(): - """Ridge regression convergence test using score - - TODO: for this test to be robust, we should use a dataset instead - of np.random. - """ + # Ridge regression convergence test using score + # TODO: for this test to be robust, we should use a dataset instead + # of np.random. rng = np.random.RandomState(0) alpha = 1.0 @@ -160,8 +158,7 @@ def test_ridge_sample_weights(): def test_ridge_shapes(): - """Test shape of coef_ and intercept_ - """ + # Test shape of coef_ and intercept_ rng = np.random.RandomState(0) n_samples, n_features = 5, 10 X = rng.randn(n_samples, n_features) @@ -185,8 +182,7 @@ def test_ridge_shapes(): def test_ridge_intercept(): - """Test intercept with multiple targets GH issue #708 - """ + # Test intercept with multiple targets GH issue #708 rng = np.random.RandomState(0) n_samples, n_features = 5, 10 X = rng.randn(n_samples, n_features) @@ -204,10 +200,8 @@ def test_ridge_intercept(): def test_toy_ridge_object(): - """Test BayesianRegression ridge classifier - - TODO: test also n_samples > n_features - """ + # Test BayesianRegression ridge classifier + # TODO: test also n_samples > n_features X = np.array([[1], [2]]) Y = np.array([1, 2]) clf = Ridge(alpha=0.0) @@ -228,7 +222,7 @@ def test_toy_ridge_object(): def test_ridge_vs_lstsq(): - """On alpha=0., Ridge and OLS yield the same solution.""" + # On alpha=0., Ridge and OLS yield the same solution. rng = np.random.RandomState(0) # we need more samples than features @@ -249,7 +243,7 @@ def test_ridge_vs_lstsq(): def test_ridge_individual_penalties(): - """Tests the ridge object using individual penalties""" + # Tests the ridge object using individual penalties rng = np.random.RandomState(42) @@ -458,9 +452,7 @@ def test_ridge_sparse_svd(): def test_class_weights(): - """ - Test class weights. - """ + # Test class weights. X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] @@ -496,9 +488,7 @@ def test_class_weights(): def test_class_weights_cv(): - """ - Test class weights for cross validated ridge classifier. - """ + # Test class weights for cross validated ridge classifier. X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] @@ -514,9 +504,7 @@ def test_class_weights_cv(): def test_ridgecv_store_cv_values(): - """ - Test _RidgeCV's store_cv_values attribute. - """ + # Test _RidgeCV's store_cv_values attribute. rng = rng = np.random.RandomState(42) n_samples = 8 @@ -540,7 +528,7 @@ def test_ridgecv_store_cv_values(): def test_raises_value_error_if_sample_weights_greater_than_1d(): - """Sample weights must be either scalar or 1D""" + # Sample weights must be either scalar or 1D n_sampless = [2, 3] n_featuress = [3, 2] @@ -579,7 +567,7 @@ def fit_ridge_not_ok_2(): def test_sparse_design_with_sample_weights(): - """Sample weights must work with sparse matrices""" + # Sample weights must work with sparse matrices n_sampless = [2, 3] n_featuress = [3, 2] @@ -610,8 +598,8 @@ def test_sparse_design_with_sample_weights(): def test_raises_value_error_if_solver_not_supported(): - """Tests whether a ValueError is raised if a non-identified solver - is passed to ridge_regression""" + # Tests whether a ValueError is raised if a non-identified solver + # is passed to ridge_regression wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)" diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index a5fd6b3c03dd7..0e7d9b4c9952c 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -174,7 +174,7 @@ def test_warm_start_optimal(self): self._test_warm_start(X, Y, "optimal") def test_input_format(self): - """Input format tests. """ + # Input format tests. clf = self.factory(alpha=0.01, n_iter=5, shuffle=False) clf.fit(X, Y) @@ -184,7 +184,7 @@ def test_input_format(self): assert_raises(ValueError, clf.fit, X, Y_) def test_clone(self): - """Test whether clone works ok. """ + # Test whether clone works ok. clf = self.factory(alpha=0.01, n_iter=5, penalty='l1') clf = clone(clf) clf.set_params(penalty='l2') @@ -257,9 +257,8 @@ def test_late_onset_averaging_reached(self): class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest): """Test suite for the dense representation variant of SGD""" factory_class = SGDClassifier - def test_sgd(self): - """Check that SGD gives any results :-)""" + # Check that SGD gives any results :-) for loss in ("hinge", "squared_hinge", "log", "modified_huber"): clf = self.factory(penalty='l2', alpha=0.01, fit_intercept=True, @@ -270,68 +269,68 @@ def test_sgd(self): @raises(ValueError) def test_sgd_bad_l1_ratio(self): - """Check whether expected ValueError on bad l1_ratio""" + # Check whether expected ValueError on bad l1_ratio self.factory(l1_ratio=1.1) @raises(ValueError) def test_sgd_bad_learning_rate_schedule(self): - """Check whether expected ValueError on bad learning_rate""" + # Check whether expected ValueError on bad learning_rate self.factory(learning_rate="") @raises(ValueError) def test_sgd_bad_eta0(self): - """Check whether expected ValueError on bad eta0""" + # Check whether expected ValueError on bad eta0 self.factory(eta0=0, learning_rate="constant") @raises(ValueError) def test_sgd_bad_alpha(self): - """Check whether expected ValueError on bad alpha""" + # Check whether expected ValueError on bad alpha self.factory(alpha=-.1) @raises(ValueError) def test_sgd_bad_penalty(self): - """Check whether expected ValueError on bad penalty""" + # Check whether expected ValueError on bad penalty self.factory(penalty='foobar', l1_ratio=0.85) @raises(ValueError) def test_sgd_bad_loss(self): - """Check whether expected ValueError on bad loss""" + # Check whether expected ValueError on bad loss self.factory(loss="foobar") @raises(ValueError) def test_sgd_n_iter_param(self): - """Test parameter validity check""" + # Test parameter validity check self.factory(n_iter=-10000) @raises(ValueError) def test_sgd_shuffle_param(self): - """Test parameter validity check""" + # Test parameter validity check self.factory(shuffle="false") @raises(TypeError) def test_argument_coef(self): - """Checks coef_init not allowed as model argument (only fit)""" + # Checks coef_init not allowed as model argument (only fit) # Provided coef_ does not match dataset. self.factory(coef_init=np.zeros((3,))).fit(X, Y) @raises(ValueError) def test_provide_coef(self): - """Checks coef_init shape for the warm starts""" + # Checks coef_init shape for the warm starts # Provided coef_ does not match dataset. self.factory().fit(X, Y, coef_init=np.zeros((3,))) @raises(ValueError) def test_set_intercept(self): - """Checks intercept_ shape for the warm starts""" + # Checks intercept_ shape for the warm starts # Provided intercept_ does not match dataset. self.factory().fit(X, Y, intercept_init=np.zeros((3,))) def test_set_intercept_binary(self): - """Checks intercept_ shape for the warm starts in binary case""" + # Checks intercept_ shape for the warm starts in binary case self.factory().fit(X5, Y5, intercept_init=0) def test_average_binary_computed_correctly(self): - """Checks the SGDClassifier correctly computes the average weights""" + # Checks the SGDClassifier correctly computes the average weights eta = .1 alpha = 2. n_samples = 20 @@ -360,7 +359,7 @@ def test_average_binary_computed_correctly(self): assert_almost_equal(clf.intercept_, average_intercept, decimal=14) def test_set_intercept_to_intercept(self): - """Checks intercept_ shape consistency for the warm starts""" + # Checks intercept_ shape consistency for the warm starts # Inconsistent intercept_ shape. clf = self.factory().fit(X5, Y5) self.factory().fit(X5, Y5, intercept_init=clf.intercept_) @@ -369,11 +368,11 @@ def test_set_intercept_to_intercept(self): @raises(ValueError) def test_sgd_at_least_two_labels(self): - """Target must have at least two labels""" + # Target must have at least two labels self.factory(alpha=0.01, n_iter=20).fit(X2, np.ones(9)) def test_partial_fit_weight_class_auto(self): - """partial_fit with class_weight='auto' not supported""" + # partial_fit with class_weight='auto' not supported assert_raises_regexp(ValueError, "class_weight 'auto' is not supported for " "partial_fit. In order to use 'auto' weights, " @@ -387,7 +386,7 @@ def test_partial_fit_weight_class_auto(self): X, Y, classes=np.unique(Y)) def test_sgd_multiclass(self): - """Multi-class test case""" + # Multi-class test case clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2) assert_equal(clf.coef_.shape, (3, 2)) assert_equal(clf.intercept_.shape, (3,)) @@ -398,7 +397,7 @@ def test_sgd_multiclass(self): def test_sgd_multiclass_average(self): eta = .001 alpha = .01 - """Multi-class average test case""" + # Multi-class average test case clf = self.factory(loss='squared_loss', learning_rate='constant', eta0=eta, alpha=alpha, @@ -419,7 +418,7 @@ def test_sgd_multiclass_average(self): decimal=16) def test_sgd_multiclass_with_init_coef(self): - """Multi-class test case""" + # Multi-class test case clf = self.factory(alpha=0.01, n_iter=20) clf.fit(X2, Y2, coef_init=np.zeros((3, 2)), intercept_init=np.zeros(3)) @@ -429,7 +428,7 @@ def test_sgd_multiclass_with_init_coef(self): assert_array_equal(pred, true_result2) def test_sgd_multiclass_njobs(self): - """Multi-class test case with multi-core support""" + # Multi-class test case with multi-core support clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2) assert_equal(clf.coef_.shape, (3, 2)) assert_equal(clf.intercept_.shape, (3,)) @@ -438,8 +437,8 @@ def test_sgd_multiclass_njobs(self): assert_array_equal(pred, true_result2) def test_set_coef_multiclass(self): - """Checks coef_init and intercept_init shape for for multi-class - problems""" + # Checks coef_init and intercept_init shape for for multi-class + # problems # Provided coef_ does not match dataset clf = self.factory() assert_raises(ValueError, clf.fit, X2, Y2, coef_init=np.zeros((2, 2))) @@ -456,7 +455,7 @@ def test_set_coef_multiclass(self): clf = self.factory().fit(X2, Y2, intercept_init=np.zeros((3,))) def test_sgd_proba(self): - """Check SGD.predict_proba""" + # Check SGD.predict_proba # Hinge loss does not allow for conditional prob estimate. # We cannot use the factory here, because it defines predict_proba @@ -523,7 +522,7 @@ def test_sgd_proba(self): assert_array_almost_equal(p[0], [1 / 3.] * 3) def test_sgd_l1(self): - """Test L1 regularization""" + # Test L1 regularization n = len(X4) rng = np.random.RandomState(13) idx = np.arange(n) @@ -552,9 +551,7 @@ def test_sgd_l1(self): assert_array_equal(pred, Y) def test_class_weights(self): - """ - Test class weights. - """ + # Test class weights. X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] @@ -574,7 +571,7 @@ def test_class_weights(self): assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1])) def test_equal_class_weight(self): - """Test if equal class weights approx. equals no class weights. """ + # Test if equal class weights approx. equals no class weights. X = [[1, 0], [1, 0], [0, 1], [0, 1]] y = [0, 0, 1, 1] clf = self.factory(alpha=0.1, n_iter=1000, class_weight=None) @@ -591,18 +588,18 @@ def test_equal_class_weight(self): @raises(ValueError) def test_wrong_class_weight_label(self): - """ValueError due to not existing class label.""" + # ValueError due to not existing class label. clf = self.factory(alpha=0.1, n_iter=1000, class_weight={0: 0.5}) clf.fit(X, Y) @raises(ValueError) def test_wrong_class_weight_format(self): - """ValueError due to wrong class_weight argument type.""" + # ValueError due to wrong class_weight argument type. clf = self.factory(alpha=0.1, n_iter=1000, class_weight=[0.5]) clf.fit(X, Y) def test_weights_multiplied(self): - """Tests that class_weight and sample_weight are multiplicative""" + # Tests that class_weight and sample_weight are multiplicative class_weights = {1: .6, 2: .3} sample_weights = np.random.random(Y4.shape[0]) multiplied_together = np.copy(sample_weights) @@ -618,7 +615,7 @@ def test_weights_multiplied(self): assert_almost_equal(clf1.coef_, clf2.coef_) def test_auto_weight(self): - """Test class weights for imbalanced data""" + # Test class weights for imbalanced data # compute reference metrics on iris dataset that is quite balanced by # default X, y = iris.data, iris.target @@ -669,7 +666,7 @@ def test_auto_weight(self): assert_greater(metrics.f1_score(y, y_pred, average='weighted'), 0.96) def test_sample_weights(self): - """Test weights on individual samples""" + # Test weights on individual samples X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] @@ -687,7 +684,7 @@ def test_sample_weights(self): @raises(ValueError) def test_wrong_sample_weights(self): - """Test if ValueError is raised if sample_weight has wrong shape""" + # Test if ValueError is raised if sample_weight has wrong shape clf = self.factory(alpha=0.1, n_iter=1000, fit_intercept=False) # provided sample_weight too long clf.fit(X, Y, sample_weight=np.arange(7)) @@ -734,11 +731,9 @@ def test_partial_fit_multiclass(self): assert_true(id1, id2) def test_fit_then_partial_fit(self): - """Partial_fit should work after initial fit in the multiclass case. - - Non-regression test for #2496; fit would previously produce a - Fortran-ordered coef_ that subsequent partial_fit couldn't handle. - """ + # Partial_fit should work after initial fit in the multiclass case. + # Non-regression test for #2496; fit would previously produce a + # Fortran-ordered coef_ that subsequent partial_fit couldn't handle. clf = self.factory() clf.fit(X2, Y2) clf.partial_fit(X2, Y2) # no exception here @@ -794,7 +789,7 @@ def test_warm_start_multiclass(self): self._test_warm_start(X2, Y2, "optimal") def test_multiple_fit(self): - """Test multiple calls of fit w/ different shaped inputs.""" + # Test multiple calls of fit w/ different shaped inputs. clf = self.factory(alpha=0.01, n_iter=5, shuffle=False) clf.fit(X, Y) @@ -820,7 +815,7 @@ class DenseSGDRegressorTestCase(unittest.TestCase, CommonTest): factory_class = SGDRegressor def test_sgd(self): - """Check that SGD gives any results.""" + # Check that SGD gives any results. clf = self.factory(alpha=0.1, n_iter=2, fit_intercept=False) clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2]) @@ -828,16 +823,16 @@ def test_sgd(self): @raises(ValueError) def test_sgd_bad_penalty(self): - """Check whether expected ValueError on bad penalty""" + # Check whether expected ValueError on bad penalty self.factory(penalty='foobar', l1_ratio=0.85) @raises(ValueError) def test_sgd_bad_loss(self): - """Check whether expected ValueError on bad loss""" + # Check whether expected ValueError on bad loss self.factory(loss="foobar") def test_sgd_averaged_computed_correctly(self): - """Tests the average regressor matches the naive implementation""" + # Tests the average regressor matches the naive implementation eta = .001 alpha = .01 @@ -865,7 +860,7 @@ def test_sgd_averaged_computed_correctly(self): assert_almost_equal(clf.intercept_, average_intercept, decimal=16) def test_sgd_averaged_partial_fit(self): - """Tests whether the partial fit yields the same average as the fit""" + # Tests whether the partial fit yields the same average as the fit eta = .001 alpha = .01 n_samples = 20 @@ -893,7 +888,7 @@ def test_sgd_averaged_partial_fit(self): assert_almost_equal(clf.intercept_[0], average_intercept, decimal=16) def test_average_sparse(self): - """Checks the average weights on data with 0s""" + # Checks the average weights on data with 0s eta = .001 alpha = .01 @@ -989,7 +984,7 @@ def test_sgd_huber_fit(self): assert_greater(score, 0.5) def test_elasticnet_convergence(self): - """Check that the SGD output is consistent with coordinate descent""" + # Check that the SGD output is consistent with coordinate descent n_samples, n_features = 1000, 5 rng = np.random.RandomState(0) @@ -1062,13 +1057,13 @@ def test_loss_function_epsilon(self): class SparseSGDRegressorTestCase(DenseSGDRegressorTestCase): - """Run exactly the same tests using the sparse representation variant""" + # Run exactly the same tests using the sparse representation variant factory_class = SparseSGDRegressor def test_l1_ratio(): - """Test if l1 ratio extremes match L1 and L2 penalty settings. """ + # Test if l1 ratio extremes match L1 and L2 penalty settings. X, y = datasets.make_classification(n_samples=1000, n_features=100, n_informative=20, random_state=1234) diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py index cafcc3639d9cc..288831bf7411f 100644 --- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py @@ -15,7 +15,7 @@ def test_sparse_coef(): - """ Check that the sparse_coef propery works """ + # Check that the sparse_coef propery works clf = ElasticNet() clf.coef_ = [1, 2, 3] @@ -24,7 +24,7 @@ def test_sparse_coef(): def test_normalize_option(): - """ Check that the normalize option in enet works """ + # Check that the normalize option in enet works X = sp.csc_matrix([[-1], [0], [1]]) y = [-1, 0, 1] clf_dense = ElasticNet(fit_intercept=True, normalize=True) @@ -37,7 +37,7 @@ def test_normalize_option(): def test_lasso_zero(): - """Check that the sparse lasso can handle zero data without crashing""" + # Check that the sparse lasso can handle zero data without crashing X = sp.csc_matrix((3, 1)) y = [0, 0, 0] T = np.array([[1], [2], [3]]) @@ -49,7 +49,7 @@ def test_lasso_zero(): def test_enet_toy_list_input(): - """Test ElasticNet for various values of alpha and l1_ratio with list X""" + # Test ElasticNet for various values of alpha and l1_ratio with list X X = np.array([[-1], [0], [1]]) X = sp.csc_matrix(X) @@ -82,8 +82,7 @@ def test_enet_toy_list_input(): def test_enet_toy_explicit_sparse_input(): - """Test ElasticNet for various values of alpha and l1_ratio with sparse - X""" + # Test ElasticNet for various values of alpha and l1_ratio with sparse X f = ignore_warnings # training samples X = sp.lil_matrix((3, 1)) diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index 6313def4cf826..846445632cfb0 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -45,7 +45,7 @@ def _check_with_col_sign_flipping(A, B, tol=0.0): def test_spectral_embedding_two_components(seed=36): - """Test spectral embedding with two components""" + # Test spectral embedding with two components random_state = np.random.RandomState(seed) n_sample = 100 affinity = np.zeros(shape=[n_sample * 2, @@ -77,7 +77,7 @@ def test_spectral_embedding_two_components(seed=36): def test_spectral_embedding_precomputed_affinity(seed=36): - """Test spectral embedding with precomputed kernel""" + # Test spectral embedding with precomputed kernel gamma = 1.0 se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed", random_state=np.random.RandomState(seed)) @@ -92,7 +92,7 @@ def test_spectral_embedding_precomputed_affinity(seed=36): def test_spectral_embedding_callable_affinity(seed=36): - """Test spectral embedding with callable affinity""" + # Test spectral embedding with callable affinity gamma = 0.9 kern = rbf_kernel(S, gamma=gamma) se_callable = SpectralEmbedding(n_components=2, @@ -113,7 +113,7 @@ def test_spectral_embedding_callable_affinity(seed=36): def test_spectral_embedding_amg_solver(seed=36): - """Test spectral embedding with amg solver""" + # Test spectral embedding with amg solver try: from pyamg import smoothed_aggregation_solver except ImportError: @@ -131,7 +131,7 @@ def test_spectral_embedding_amg_solver(seed=36): def test_pipeline_spectral_clustering(seed=36): - """Test using pipeline to do spectral clustering""" + # Test using pipeline to do spectral clustering random_state = np.random.RandomState(seed) se_rbf = SpectralEmbedding(n_components=n_clusters, affinity="rbf", @@ -150,7 +150,7 @@ def test_pipeline_spectral_clustering(seed=36): def test_spectral_embedding_unknown_eigensolver(seed=36): - """Test that SpectralClustering fails with an unknown eigensolver""" + # Test that SpectralClustering fails with an unknown eigensolver se = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=np.random.RandomState(seed), eigen_solver="") @@ -158,14 +158,14 @@ def test_spectral_embedding_unknown_eigensolver(seed=36): def test_spectral_embedding_unknown_affinity(seed=36): - """Test that SpectralClustering fails with an unknown affinity type""" + # Test that SpectralClustering fails with an unknown affinity type se = SpectralEmbedding(n_components=1, affinity="", random_state=np.random.RandomState(seed)) assert_raises(ValueError, se.fit, S) def test_connectivity(seed=36): - """Test that graph connectivity test works as expected""" + # Test that graph connectivity test works as expected graph = np.array([[1, 0, 0, 0, 0], [0, 1, 1, 0, 0], [0, 1, 1, 1, 0], diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index ec857403a5fed..a76f177ae0da7 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -19,7 +19,7 @@ def test_gradient_descent_stops(): - """Test stopping conditions of gradient descent.""" + # Test stopping conditions of gradient descent. class ObjectiveSmallGradient: def __init__(self): self.it = -1 @@ -97,7 +97,7 @@ def flat_function(_): def test_binary_search(): - """Test if the binary search finds Gaussians with desired perplexity.""" + # Test if the binary search finds Gaussians with desired perplexity. random_state = check_random_state(0) distances = random_state.randn(50, 2) distances = distances.dot(distances.T) @@ -111,7 +111,7 @@ def test_binary_search(): def test_gradient(): - """Test gradient of Kullback-Leibler divergence.""" + # Test gradient of Kullback-Leibler divergence. random_state = check_random_state(0) n_samples = 50 @@ -135,7 +135,7 @@ def test_gradient(): def test_trustworthiness(): - """Test trustworthiness score.""" + # Test trustworthiness score. random_state = check_random_state(0) # Affine transformation @@ -155,7 +155,7 @@ def test_trustworthiness(): def test_preserve_trustworthiness_approximately(): - """Nearest neighbors should be preserved approximately.""" + # Nearest neighbors should be preserved approximately. random_state = check_random_state(0) X = random_state.randn(100, 2) for init in ('random', 'pca'): @@ -167,7 +167,7 @@ def test_preserve_trustworthiness_approximately(): def test_fit_csr_matrix(): - """X can be a sparse matrix.""" + # X can be a sparse matrix. random_state = check_random_state(0) X = random_state.randn(100, 2) X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0 @@ -180,7 +180,7 @@ def test_fit_csr_matrix(): def test_preserve_trustworthiness_approximately_with_precomputed_distances(): - """Nearest neighbors should be preserved approximately.""" + # Nearest neighbors should be preserved approximately. random_state = check_random_state(0) X = random_state.randn(100, 2) D = squareform(pdist(X), "sqeuclidean") @@ -192,41 +192,41 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances(): def test_early_exaggeration_too_small(): - """Early exaggeration factor must be >= 1.""" + # Early exaggeration factor must be >= 1. tsne = TSNE(early_exaggeration=0.99) assert_raises_regexp(ValueError, "early_exaggeration .*", tsne.fit_transform, np.array([[0.0]])) def test_too_few_iterations(): - """Number of gradient descent iterations must be at least 200.""" + # Number of gradient descent iterations must be at least 200. tsne = TSNE(n_iter=199) assert_raises_regexp(ValueError, "n_iter .*", tsne.fit_transform, np.array([[0.0]])) def test_non_square_precomputed_distances(): - """Precomputed distance matrices must be square matrices.""" + # Precomputed distance matrices must be square matrices. tsne = TSNE(metric="precomputed") assert_raises_regexp(ValueError, ".* square distance matrix", tsne.fit_transform, np.array([[0.0], [1.0]])) def test_init_not_available(): - """'init' must be 'pca' or 'random'.""" + # 'init' must be 'pca' or 'random'. assert_raises_regexp(ValueError, "'init' must be either 'pca' or 'random'", TSNE, init="not available") def test_distance_not_available(): - """'metric' must be valid.""" + # 'metric' must be valid. tsne = TSNE(metric="not available") assert_raises_regexp(ValueError, "Unknown metric not available.*", tsne.fit_transform, np.array([[0.0], [1.0]])) def test_pca_initialization_not_compatible_with_precomputed_kernel(): - """Precomputed distance matrices must be square matrices.""" + # Precomputed distance matrices must be square matrices. tsne = TSNE(metric="precomputed", init="pca") assert_raises_regexp(ValueError, "The parameter init=\"pca\" cannot be " "used with metric=\"precomputed\".", @@ -257,7 +257,7 @@ def test_verbose(): def test_chebyshev_metric(): - """t-SNE should allow metrics that cannot be squared (issue #3526).""" + # t-SNE should allow metrics that cannot be squared (issue #3526). random_state = check_random_state(0) tsne = TSNE(metric="chebyshev") X = random_state.randn(5, 2) @@ -265,7 +265,7 @@ def test_chebyshev_metric(): def test_reduction_to_one_component(): - """t-SNE should allow reduction to one component (issue #4154).""" + # t-SNE should allow reduction to one component (issue #4154). random_state = check_random_state(0) tsne = TSNE(n_components=1) X = random_state.randn(5, 2) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 5f70e235a9c33..30b310ca6216c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -128,7 +128,7 @@ def test_multilabel_accuracy_score_subset_accuracy(): def test_precision_recall_f1_score_binary(): - """Test Precision Recall and F1 Score for binary classification task""" + # Test Precision Recall and F1 Score for binary classification task y_true, y_pred, _ = make_prediction(binary=True) # detailed measures for each class @@ -161,10 +161,9 @@ def test_precision_recall_f1_score_binary(): @ignore_warnings def test_precision_recall_f_binary_single_class(): - """Test precision, recall and F1 score behave with a single positive or - negative class - - Such a case may occur with non-stratified cross-validation""" + # Test precision, recall and F1 score behave with a single positive or + # negative class + # Such a case may occur with non-stratified cross-validation assert_equal(1., precision_score([1, 1], [1, 1])) assert_equal(1., recall_score([1, 1], [1, 1])) assert_equal(1., f1_score([1, 1], [1, 1])) @@ -175,9 +174,8 @@ def test_precision_recall_f_binary_single_class(): def test_average_precision_score_score_non_binary_class(): - """Test that average_precision_score function returns an error when trying - to compute average_precision_score for multiclass task. - """ + # Test that average_precision_score function returns an error when trying + # to compute average_precision_score for multiclass task. rng = check_random_state(404) y_pred = rng.rand(10) @@ -229,7 +227,7 @@ def test_precision_recall_fscore_support_errors(): def test_confusion_matrix_binary(): - """Test confusion matrix - binary classification case""" + # Test confusion matrix - binary classification case y_true, y_pred, _ = make_prediction(binary=True) def test(y_true, y_pred): @@ -257,7 +255,7 @@ def test_matthews_corrcoef_nan(): def test_precision_recall_f1_score_multiclass(): - """Test Precision Recall and F1 Score for multiclass classification task""" + # Test Precision Recall and F1 Score for multiclass classification task y_true, y_pred, _ = make_prediction(binary=False) # compute scores with default labels introspection @@ -326,10 +324,8 @@ def test_precision_refcall_f1_score_multilabel_unordered_labels(): def test_precision_recall_f1_score_multiclass_pos_label_none(): - """Test Precision Recall and F1 Score for multiclass classification task - - GH Issue #1296 - """ + # Test Precision Recall and F1 Score for multiclass classification task + # GH Issue #1296 # initialize data y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1]) y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1]) @@ -341,7 +337,7 @@ def test_precision_recall_f1_score_multiclass_pos_label_none(): def test_zero_precision_recall(): - """Check that pathological cases do not bring NaNs""" + # Check that pathological cases do not bring NaNs old_error_settings = np.seterr(all='raise') @@ -361,7 +357,7 @@ def test_zero_precision_recall(): def test_confusion_matrix_multiclass(): - """Test confusion matrix - multi-class case""" + # Test confusion matrix - multi-class case y_true, y_pred, _ = make_prediction(binary=False) def test(y_true, y_pred, string_type=False): @@ -387,7 +383,7 @@ def test(y_true, y_pred, string_type=False): def test_confusion_matrix_multiclass_subset_labels(): - """Test confusion matrix - multi-class case with subset of labels""" + # Test confusion matrix - multi-class case with subset of labels y_true, y_pred, _ = make_prediction(binary=False) # compute confusion matrix with only first two labels considered @@ -403,7 +399,7 @@ def test_confusion_matrix_multiclass_subset_labels(): def test_classification_report_multiclass(): - """Test performance report""" + # Test performance report iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) @@ -421,7 +417,6 @@ def test_classification_report_multiclass(): y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names) assert_equal(report, expected_report) - # print classification report with label detection expected_report = """\ precision recall f1-score support @@ -437,7 +432,7 @@ def test_classification_report_multiclass(): def test_classification_report_multiclass_with_digits(): - """Test performance report with added digits in floating point values""" + # Test performance report with added digits in floating point values iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) @@ -455,7 +450,6 @@ def test_classification_report_multiclass_with_digits(): y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, digits=5) assert_equal(report, expected_report) - # print classification report with label detection expected_report = """\ precision recall f1-score support @@ -526,8 +520,6 @@ def test_classification_report_multiclass_with_unicode_label(): else: report = classification_report(y_true, y_pred) assert_equal(report, expected_report) - - @ignore_warnings # sequence of sequences is deprecated def test_multilabel_classification_report(): n_classes = 4 @@ -537,7 +529,6 @@ def test_multilabel_classification_report(): n_samples=n_samples) _, y_pred_ll = make_ml(n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples) - expected_report = """\ precision recall f1-score support @@ -653,8 +644,7 @@ def test_multilabel_jaccard_similarity_score(): @ignore_warnings def test_precision_recall_f1_score_multilabel_1(): - """ Test precision_recall_f1_score on a crafted multilabel example - """ + # Test precision_recall_f1_score on a crafted multilabel example # First crafted example y_true_ll = [(0,), (1,), (2, 3)] y_pred_ll = [(1,), (1,), (2, 0)] @@ -730,8 +720,7 @@ def test_precision_recall_f1_score_multilabel_1(): @ignore_warnings def test_precision_recall_f1_score_multilabel_2(): - """ Test precision_recall_f1_score on a crafted multilabel example 2 - """ + # Test precision_recall_f1_score on a crafted multilabel example 2 # Second crafted example y_true_ll = [(1,), (2,), (2, 3)] y_pred_ll = [(4,), (4,), (2, 1)] @@ -1014,8 +1003,7 @@ def test_fscore_warnings(): def test_prf_average_compat(): - """Ensure warning if f1_score et al.'s average is implicit for multiclass - """ + # Ensure warning if f1_score et al.'s average is implicit for multiclass y_true = [1, 2, 3, 3] y_pred = [1, 2, 3, 1] y_true_bin = [0, 1, 1] @@ -1044,8 +1032,8 @@ def test_prf_average_compat(): @ignore_warnings # sequence of sequences is deprecated def test__check_targets(): - """Check that _check_targets correctly merges target types, squeezes - output and fails if input lengths differ.""" + # Check that _check_targets correctly merges target types, squeezes + # output and fails if input lengths differ. IND = 'multilabel-indicator' SEQ = 'multilabel-sequences' MC = 'multiclass' @@ -1274,7 +1262,7 @@ def test_log_loss(): def test_brier_score_loss(): - """Check brier_score_loss function""" + # Check brier_score_loss function y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95]) true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index d04784f305538..166bc1fbc53c2 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -346,7 +346,7 @@ @ignore_warnings def test_symmetry(): - """Test the symmetry of score and loss functions""" + # Test the symmetry of score and loss functions random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(20, )) y_pred = random_state.randint(0, 2, size=(20, )) @@ -513,7 +513,7 @@ def test_format_invariance_with_1d_vectors(): @ignore_warnings def test_invariance_string_vs_numbers_labels(): - """Ensure that classification metrics with string labels""" + # Ensure that classification metrics with string labels random_state = check_random_state(0) y1 = random_state.randint(0, 2, size=(20, )) y2 = random_state.randint(0, 2, size=(20, )) @@ -586,12 +586,10 @@ def test_invariance_string_vs_numbers_labels(): @ignore_warnings def check_single_sample(name): - """Non-regression test: scores should work with a single sample. - - This is important for leave-one-out cross validation. - Score functions tested are those that formerly called np.squeeze, - which turns an array of size 1 into a 0-d array (!). - """ + # Non-regression test: scores should work with a single sample. + # This is important for leave-one-out cross validation. + # Score functions tested are those that formerly called np.squeeze, + # which turns an array of size 1 into a 0-d array (!). metric = ALL_METRICS[name] # assert that no exception is thrown diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index b5a375be361d2..43a8eaddb84b4 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -40,7 +40,7 @@ def test_pairwise_distances(): - """ Test the pairwise_distance helper function. """ + # Test the pairwise_distance helper function. rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. X = rng.random_sample((5, 4)) @@ -156,22 +156,20 @@ def test_pairwise_parallel(): def test_pairwise_callable_nonstrict_metric(): - """paired_distances should allow callable metric where metric(x, x) != 0 - - Knowing that the callable is a strict metric would allow the diagonal to - be left uncalculated and set to 0. - """ + # paired_distances should allow callable metric where metric(x, x) != 0 + # Knowing that the callable is a strict metric would allow the diagonal to + # be left uncalculated and set to 0. assert_equal(pairwise_distances([[1]], metric=lambda x, y: 5)[0, 0], 5) def callable_rbf_kernel(x, y, **kwds): - """ Callable version of pairwise.rbf_kernel. """ + # Callable version of pairwise.rbf_kernel. K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds) return K def test_pairwise_kernels(): - """ Test the pairwise_kernels helper function. """ + # Test the pairwise_kernels helper function. rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) @@ -232,7 +230,7 @@ def test_pairwise_kernels_filter_param(): def test_paired_distances(): - """ Test the pairwise_distance helper function. """ + # Test the pairwise_distance helper function. rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. X = rng.random_sample((5, 4)) @@ -263,7 +261,7 @@ def test_paired_distances(): def test_pairwise_distances_argmin_min(): - """ Check pairwise minimum distances computation for any metric""" + # Check pairwise minimum distances computation for any metric X = [[0], [1]] Y = [[-1], [2]] @@ -325,7 +323,7 @@ def test_pairwise_distances_argmin_min(): def test_euclidean_distances(): - """ Check the pairwise Euclidean distances computation""" + # Check the pairwise Euclidean distances computation X = [[0]] Y = [[1], [2]] D = euclidean_distances(X, Y) @@ -340,7 +338,7 @@ def test_euclidean_distances(): # Paired distances def test_paired_euclidean_distances(): - """ Check the paired Euclidean distances computation""" + # Check the paired Euclidean distances computation X = [[0], [0]] Y = [[1], [2]] D = paired_euclidean_distances(X, Y) @@ -348,7 +346,7 @@ def test_paired_euclidean_distances(): def test_paired_manhattan_distances(): - """ Check the paired manhattan distances computation""" + # Check the paired manhattan distances computation X = [[0], [0]] Y = [[1], [2]] D = paired_manhattan_distances(X, Y) @@ -411,7 +409,7 @@ def test_chi_square_kernel(): def test_kernel_symmetry(): - """ Valid kernels should be symmetric""" + # Valid kernels should be symmetric rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) for kernel in (linear_kernel, polynomial_kernel, rbf_kernel, @@ -448,7 +446,7 @@ def test_rbf_kernel(): def test_cosine_similarity(): - """ Test the cosine_similarity. """ + # Test the cosine_similarity. rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) @@ -469,7 +467,7 @@ def test_cosine_similarity(): def test_check_dense_matrices(): - """ Ensure that pairwise array check works for dense matrices.""" + # Ensure that pairwise array check works for dense matrices. # Check that if XB is None, XB is returned as reference to XA XA = np.resize(np.arange(40), (5, 8)) XA_checked, XB_checked = check_pairwise_arrays(XA, None) @@ -478,7 +476,7 @@ def test_check_dense_matrices(): def test_check_XB_returned(): - """ Ensure that if XA and XB are given correctly, they return as equal.""" + # Ensure that if XA and XB are given correctly, they return as equal. # Check that if XB is not None, it is returned equal. # Note that the second dimension of XB is the same as XA. XA = np.resize(np.arange(40), (5, 8)) @@ -494,7 +492,7 @@ def test_check_XB_returned(): def test_check_different_dimensions(): - """ Ensure an error is raised if the dimensions are different. """ + # Ensure an error is raised if the dimensions are different. XA = np.resize(np.arange(45), (5, 9)) XB = np.resize(np.arange(32), (4, 8)) assert_raises(ValueError, check_pairwise_arrays, XA, XB) @@ -504,7 +502,7 @@ def test_check_different_dimensions(): def test_check_invalid_dimensions(): - """ Ensure an error is raised on 1D input arrays. """ + # Ensure an error is raised on 1D input arrays. XA = np.arange(45) XB = np.resize(np.arange(32), (4, 8)) assert_raises(ValueError, check_pairwise_arrays, XA, XB) @@ -514,7 +512,7 @@ def test_check_invalid_dimensions(): def test_check_sparse_arrays(): - """ Ensures that checks return valid sparse matrices. """ + # Ensures that checks return valid sparse matrices. rng = np.random.RandomState(0) XA = rng.random_sample((5, 4)) XA_sparse = csr_matrix(XA) @@ -536,7 +534,7 @@ def test_check_sparse_arrays(): def tuplify(X): - """ Turns a numpy matrix (any n-dimensional array) into tuples.""" + # Turns a numpy matrix (any n-dimensional array) into tuples. s = X.shape if len(s) > 1: # Tuplify each sub-array in the input. @@ -547,7 +545,7 @@ def tuplify(X): def test_check_tuple_input(): - """ Ensures that checks return valid tuples. """ + # Ensures that checks return valid tuples. rng = np.random.RandomState(0) XA = rng.random_sample((5, 4)) XA_tuples = tuplify(XA) @@ -559,7 +557,7 @@ def test_check_tuple_input(): def test_check_preserve_type(): - """ Ensures that type float32 is preserved. """ + # Ensures that type float32 is preserved. XA = np.resize(np.arange(40), (5, 8)).astype(np.float32) XB = np.resize(np.arange(40), (5, 8)).astype(np.float32) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 40ce6f659c569..1dd4d385f93b7 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -123,7 +123,7 @@ def _average_precision(y_true, y_score): def test_roc_curve(): - """Test Area under Receiver Operating Characteristic (ROC) curve""" + # Test Area under Receiver Operating Characteristic (ROC) curve y_true, _, probas_pred = make_prediction(binary=True) fpr, tpr, thresholds = roc_curve(y_true, probas_pred) @@ -149,7 +149,7 @@ def test_roc_curve_end_points(): def test_roc_returns_consistency(): - """Test whether the returned threshold matches up with tpr""" + # Test whether the returned threshold matches up with tpr # make small toy dataset y_true, _, probas_pred = make_prediction(binary=True) fpr, tpr, thresholds = roc_curve(y_true, probas_pred) @@ -168,10 +168,8 @@ def test_roc_returns_consistency(): def test_roc_nonrepeating_thresholds(): - """Test to ensure that we don't return spurious repeating thresholds. - - Duplicated thresholds can arise due to machine precision issues. - """ + # Test to ensure that we don't return spurious repeating thresholds. + # Duplicated thresholds can arise due to machine precision issues. dataset = datasets.load_digits() X = dataset['data'] y = dataset['target'] @@ -193,14 +191,14 @@ def test_roc_nonrepeating_thresholds(): def test_roc_curve_multi(): - """roc_curve not applicable for multi-class problems""" + # roc_curve not applicable for multi-class problems y_true, _, probas_pred = make_prediction(binary=False) assert_raises(ValueError, roc_curve, y_true, probas_pred) def test_roc_curve_confidence(): - """roc_curve for confidence scores""" + # roc_curve for confidence scores y_true, _, probas_pred = make_prediction(binary=True) fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5) @@ -211,7 +209,7 @@ def test_roc_curve_confidence(): def test_roc_curve_hard(): - """roc_curve for hard decisions""" + # roc_curve for hard decisions y_true, pred, probas_pred = make_prediction(binary=True) # always predict one @@ -350,7 +348,7 @@ def test_roc_curve_toydata(): def test_auc(): - """Test Area Under Curve (AUC) computation""" + # Test Area Under Curve (AUC) computation x = [0, 1] y = [0, 1] assert_array_almost_equal(auc(x, y), 0.5) @@ -396,9 +394,8 @@ def test_auc_errors(): def test_auc_score_non_binary_class(): - """Test that roc_auc_score function returns an error when trying - to compute AUC for non-binary class values. - """ + # Test that roc_auc_score function returns an error when trying + # to compute AUC for non-binary class values. rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value @@ -473,7 +470,7 @@ def test_precision_recall_curve_pos_label(): def _test_precision_recall_curve(y_true, probas_pred): - """Test Precision-Recall and aread under PR curve""" + # Test Precision-Recall and aread under PR curve p, r, thresholds = precision_recall_curve(y_true, probas_pred) precision_recall_auc = auc(r, p) assert_array_almost_equal(precision_recall_auc, 0.85, 2) @@ -618,7 +615,7 @@ def test_score_scale_invariance(): def check_lrap_toy(lrap_score): - """Check on several small example that it works """ + # Check on several small example that it works assert_almost_equal(lrap_score([[0, 1]], [[0.25, 0.75]]), 1) assert_almost_equal(lrap_score([[0, 1]], [[0.75, 0.25]]), 1 / 2) assert_almost_equal(lrap_score([[1, 1]], [[0.75, 0.25]]), 1) @@ -714,7 +711,7 @@ def check_lrap_error_raised(lrap_score): def check_lrap_only_ties(lrap_score): - """Check tie handling in score""" + # Check tie handling in score # Basic check with only ties and increasing label space for n_labels in range(2, 10): y_score = np.ones((1, n_labels)) @@ -730,7 +727,7 @@ def check_lrap_only_ties(lrap_score): def check_lrap_without_tie_and_increasing_score(lrap_score): - """ Check that Label ranking average precision works for various""" + # Check that Label ranking average precision works for various # Basic check with increasing label space size and decreasing score for n_labels in range(2, 10): y_score = n_labels - (np.arange(n_labels).reshape((1, n_labels)) + 1) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 3c97dd66e7c02..5a2523bf1dcf7 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -83,7 +83,7 @@ def __call__(self, est, X, y): def test_check_scoring(): - """Test all branches of check_scoring""" + # Test all branches of check_scoring estimator = EstimatorWithoutFit() pattern = (r"estimator should a be an estimator implementing 'fit' method," r" .* was passed") @@ -134,14 +134,14 @@ def test_check_scoring_gridsearchcv(): def test_make_scorer(): - """Sanity check on the make_scorer factory function.""" + # Sanity check on the make_scorer factory function. f = lambda *args: 0 assert_raises(ValueError, make_scorer, f, needs_threshold=True, needs_proba=True) def test_classification_scores(): - """Test classification scorers.""" + # Test classification scorers. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LinearSVC(random_state=0) @@ -185,7 +185,7 @@ def test_classification_scores(): def test_regression_scorers(): - """Test regression scorers.""" + # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) @@ -197,7 +197,7 @@ def test_regression_scorers(): def test_thresholded_scorers(): - """Test scorers that take thresholds.""" + # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LogisticRegression(random_state=0) @@ -227,9 +227,8 @@ def test_thresholded_scorers(): def test_thresholded_scorers_multilabel_indicator_data(): - """Test that the scorer work with multilabel-indicator format - for multilabel and multi-output multi-class classifier - """ + # Test that the scorer work with multilabel-indicator format + # for multilabel and multi-output multi-class classifier X, y = make_multilabel_classification(return_indicator=True, allow_unlabeled=False, random_state=0) @@ -272,7 +271,7 @@ def test_thresholded_scorers_multilabel_indicator_data(): def test_unsupervised_scorers(): - """Test clustering scorers against gold standard labeling.""" + # Test clustering scorers against gold standard labeling. # We don't have any real unsupervised Scorers yet. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) @@ -285,7 +284,7 @@ def test_unsupervised_scorers(): @ignore_warnings def test_raises_on_score_list(): - """Test that when a list of scores is returned, we raise proper errors.""" + # Test that when a list of scores is returned, we raise proper errors. X, y = make_blobs(random_state=0) f1_scorer_no_average = make_scorer(f1_score, average=None) clf = DecisionTreeClassifier() @@ -298,7 +297,7 @@ def test_raises_on_score_list(): @ignore_warnings def test_scorer_sample_weight(): - """Test that scorers support sample_weight or raise sensible errors""" + # Test that scorers support sample_weight or raise sensible errors # Unlike the metrics invariance test, in the scorer case it's harder # to ensure that, on the classifier output, weighted and unweighted diff --git a/sklearn/mixture/tests/test_gmm.py b/sklearn/mixture/tests/test_gmm.py index 984218c4fae24..4f41e174aedb6 100644 --- a/sklearn/mixture/tests/test_gmm.py +++ b/sklearn/mixture/tests/test_gmm.py @@ -13,10 +13,8 @@ def test_sample_gaussian(): - """ - Test sample generation from mixture.sample_gaussian where covariance - is diagonal, spherical and full - """ + # Test sample generation from mixture.sample_gaussian where covariance + # is diagonal, spherical and full n_features, n_samples = 2, 300 axis = 1 @@ -65,11 +63,9 @@ def _naive_lmvnpdf_diag(X, mu, cv): def test_lmvnpdf_diag(): - """ - test a slow and naive implementation of lmvnpdf and - compare it to the vectorized version (mixture.lmvnpdf) to test - for correctness - """ + # test a slow and naive implementation of lmvnpdf and + # compare it to the vectorized version (mixture.lmvnpdf) to test + # for correctness n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0) ** 2 @@ -237,8 +233,7 @@ def test_train(self, params='wmc'): % (delta_min, self.threshold, self.covariance_type, trainll)) def test_train_degenerate(self, params='wmc'): - """ Train on degenerate data with 0 in some dimensions - """ + # Train on degenerate data with 0 in some dimensions # Create a training set by sampling from the predefined distribution. X = rng.randn(100, self.n_features) X.T[1:] = 0 @@ -250,8 +245,7 @@ def test_train_degenerate(self, params='wmc'): self.assertTrue(np.sum(np.abs(trainll / 100 / X.shape[1])) < 5) def test_train_1d(self, params='wmc'): - """ Train on 1-D data - """ + # Train on 1-D data # Create a training set by sampling from the predefined distribution. X = rng.randn(100, 1) #X.T[1:] = 0 @@ -294,7 +288,7 @@ class TestGMMWithFullCovars(unittest.TestCase, GMMTester): def test_multiple_init(): - """Test that multiple inits does not much worse than a single one""" + # Test that multiple inits does not much worse than a single one X = rng.randn(30, 5) X[:10] += 2 g = mixture.GMM(n_components=2, covariance_type='spherical', @@ -306,7 +300,7 @@ def test_multiple_init(): def test_n_parameters(): - """Test that the right number of parameters is estimated""" + # Test that the right number of parameters is estimated n_samples, n_dim, n_components = 7, 5, 2 X = rng.randn(n_samples, n_dim) n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41} @@ -318,10 +312,8 @@ def test_n_parameters(): def test_1d_1component(): - """ - Test all of the covariance_types return the same BIC score for - 1-dimensional, 1 component fits. - """ + # Test all of the covariance_types return the same BIC score for + # 1-dimensional, 1 component fits. n_samples, n_dim, n_components = 100, 1, 1 X = rng.randn(n_samples, n_dim) g_full = mixture.GMM(n_components=n_components, covariance_type='full', @@ -336,7 +328,7 @@ def test_1d_1component(): def test_aic(): - """ Test the aic and bic criteria""" + # Test the aic and bic criteria n_samples, n_dim, n_components = 50, 3, 2 X = rng.randn(n_samples, n_dim) SGH = 0.5 * (X.var() + np.log(2 * np.pi)) # standard gaussian entropy diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py index 8612e5bef5c62..50794e263c831 100644 --- a/sklearn/neighbors/tests/test_approximate.py +++ b/sklearn/neighbors/tests/test_approximate.py @@ -27,7 +27,7 @@ def test_neighbors_accuracy_with_n_candidates(): - """Checks whether accuracy increases as `n_candidates` increases.""" + # Checks whether accuracy increases as `n_candidates` increases. n_candidates_values = np.array([.1, 50, 500]) n_samples = 100 n_features = 10 @@ -61,7 +61,7 @@ def test_neighbors_accuracy_with_n_candidates(): def test_neighbors_accuracy_with_n_estimators(): - """Checks whether accuracy increases as `n_estimators` increases.""" + # Checks whether accuracy increases as `n_estimators` increases. n_estimators = np.array([1, 10, 100]) n_samples = 100 n_features = 10 @@ -96,12 +96,10 @@ def test_neighbors_accuracy_with_n_estimators(): @ignore_warnings def test_kneighbors(): - """Checks whether desired number of neighbors are returned. - - It is guaranteed to return the requested number of neighbors - if `min_hash_match` is set to 0. Returned distances should be - in ascending order. - """ + # Checks whether desired number of neighbors are returned. + # It is guaranteed to return the requested number of neighbors + # if `min_hash_match` is set to 0. Returned distances should be + # in ascending order. n_samples = 12 n_features = 2 n_iter = 10 @@ -147,14 +145,12 @@ def test_kneighbors(): def test_radius_neighbors(): - """Checks whether Returned distances are less than `radius` - - At least one point should be returned when the `radius` is set - to mean distance from the considering point to other points in - the database. - Moreover, this test compares the radius neighbors of LSHForest - with the `sklearn.neighbors.NearestNeighbors`. - """ + # Checks whether Returned distances are less than `radius` + # At least one point should be returned when the `radius` is set + # to mean distance from the considering point to other points in + # the database. + # Moreover, this test compares the radius neighbors of LSHForest + # with the `sklearn.neighbors.NearestNeighbors`. n_samples = 12 n_features = 2 n_iter = 10 @@ -281,7 +277,7 @@ def test_radius_neighbors_boundary_handling(): def test_distances(): - """Checks whether returned neighbors are from closest to farthest.""" + # Checks whether returned neighbors are from closest to farthest. n_samples = 12 n_features = 2 n_iter = 10 @@ -307,7 +303,7 @@ def test_distances(): def test_fit(): - """Checks whether `fit` method sets all attribute values correctly.""" + # Checks whether `fit` method sets all attribute values correctly. n_samples = 12 n_features = 2 n_estimators = 5 @@ -334,10 +330,8 @@ def test_fit(): def test_partial_fit(): - """Checks whether inserting array is consitent with fitted data. - - `partial_fit` method should set all attribute values correctly. - """ + # Checks whether inserting array is consitent with fitted data. + # `partial_fit` method should set all attribute values correctly. n_samples = 12 n_samples_partial_fit = 3 n_features = 2 @@ -371,13 +365,11 @@ def test_partial_fit(): def test_hash_functions(): - """Checks randomness of hash functions. - - Variance and mean of each hash function (projection vector) - should be different from flattened array of hash functions. - If hash functions are not randomly built (seeded with - same value), variances and means of all functions are equal. - """ + # Checks randomness of hash functions. + # Variance and mean of each hash function (projection vector) + # should be different from flattened array of hash functions. + # If hash functions are not randomly built (seeded with + # same value), variances and means of all functions are equal. n_samples = 12 n_features = 2 n_estimators = 5 @@ -402,12 +394,10 @@ def test_hash_functions(): def test_candidates(): - """Checks whether candidates are sufficient. - - This should handle the cases when number of candidates is 0. - User should be warned when number of candidates is less than - requested number of neighbors. - """ + # Checks whether candidates are sufficient. + # This should handle the cases when number of candidates is 0. + # User should be warned when number of candidates is less than + # requested number of neighbors. X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1], [6, 10, 2]], dtype=np.float32) X_test = np.array([7, 10, 3], dtype=np.float32) @@ -442,7 +432,7 @@ def test_candidates(): def test_graphs(): - """Smoke tests for graph methods.""" + # Smoke tests for graph methods. n_samples_sizes = [5, 10, 20] n_features = 3 rng = np.random.RandomState(42) diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index 0c248fdadb7ce..7a26af1833665 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -177,7 +177,7 @@ def check_results(kernel, h, atol, rtol, breadth_first): def test_gaussian_kde(n_samples=1000): - """Compare gaussian KDE results to scipy.stats.gaussian_kde""" + # Compare gaussian KDE results to scipy.stats.gaussian_kde from scipy.stats import gaussian_kde np.random.seed(0) x_in = np.random.normal(0, 1, n_samples) diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py index c949f1035216a..8bccc46087996 100644 --- a/sklearn/neighbors/tests/test_kd_tree.py +++ b/sklearn/neighbors/tests/test_kd_tree.py @@ -133,7 +133,7 @@ def check_results(kernel, h, atol, rtol, breadth_first): def test_gaussian_kde(n_samples=1000): - """Compare gaussian KDE results to scipy.stats.gaussian_kde""" + # Compare gaussian KDE results to scipy.stats.gaussian_kde from scipy.stats import gaussian_kde np.random.seed(0) x_in = np.random.normal(0, 1, n_samples) diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py index 132ae3171d06c..c1c63288d6473 100644 --- a/sklearn/neighbors/tests/test_kde.py +++ b/sklearn/neighbors/tests/test_kde.py @@ -90,7 +90,7 @@ def test_kernel_density_sampling(n_samples=100, n_features=3): def test_kde_algorithm_metric_choice(): - """Smoke test for various metrics and algorithms""" + # Smoke test for various metrics and algorithms rng = np.random.RandomState(0) X = rng.randn(10, 2) # 2 features required for haversine dist. Y = rng.randn(10, 2) diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py index 9922434c3b869..8ecb226edef17 100644 --- a/sklearn/neighbors/tests/test_nearest_centroid.py +++ b/sklearn/neighbors/tests/test_nearest_centroid.py @@ -29,7 +29,7 @@ def test_classification_toy(): - """Check classification on a toy dataset, including sparse versions.""" + # Check classification on a toy dataset, including sparse versions. clf = NearestCentroid() clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) @@ -63,7 +63,7 @@ def test_precomputed(): def test_iris(): - """Check consistency on dataset iris.""" + # Check consistency on dataset iris. for metric in ('euclidean', 'cosine'): clf = NearestCentroid(metric=metric).fit(iris.data, iris.target) score = np.mean(clf.predict(iris.data) == iris.target) @@ -71,7 +71,7 @@ def test_iris(): def test_iris_shrinkage(): - """Check consistency on dataset iris, when using shrinkage.""" + # Check consistency on dataset iris, when using shrinkage. for metric in ('euclidean', 'cosine'): for shrink_threshold in [None, 0.1, 0.5]: clf = NearestCentroid(metric=metric, @@ -109,7 +109,7 @@ def test_shrinkage_threshold_decoded_y(): def test_predict_translated_data(): - """Test that NearestCentroid gives same results on translated data""" + # Test that NearestCentroid gives same results on translated data rng = np.random.RandomState(0) X = rng.rand(50, 50) @@ -126,7 +126,7 @@ def test_predict_translated_data(): def test_manhattan_metric(): - """Test the manhattan metric.""" + # Test the manhattan metric. clf = NearestCentroid(metric='manhattan') clf.fit(X, y) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 270b470a6d3fa..494f3fe11f14d 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -56,7 +56,7 @@ def _weight_func(dist): def test_unsupervised_kneighbors(n_samples=20, n_features=5, n_query_pts=2, n_neighbors=5): - """Test unsupervised neighbors methods""" + # Test unsupervised neighbors methods X = rng.rand(n_samples, n_features) test = rng.rand(n_query_pts, n_features) @@ -82,7 +82,7 @@ def test_unsupervised_kneighbors(n_samples=20, n_features=5, def test_unsupervised_inputs(): - """test the types of valid input into NearestNeighbors""" + # test the types of valid input into NearestNeighbors X = rng.random_sample((10, 3)) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) @@ -103,7 +103,7 @@ def test_unsupervised_inputs(): def test_unsupervised_radius_neighbors(n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0): - """Test unsupervised radius-based query""" + # Test unsupervised radius-based query rng = np.random.RandomState(random_state) X = rng.rand(n_samples, n_features) @@ -146,7 +146,7 @@ def test_kneighbors_classifier(n_samples=40, n_test_pts=10, n_neighbors=5, random_state=0): - """Test k-neighbors classification""" + # Test k-neighbors classification rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = ((X ** 2).sum(axis=1) < .5).astype(np.int) @@ -172,7 +172,7 @@ def test_kneighbors_classifier(n_samples=40, def test_kneighbors_classifier_float_labels(n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0): - """Test k-neighbors classification""" + # Test k-neighbors classification rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = ((X ** 2).sum(axis=1) < .5).astype(np.int) @@ -185,7 +185,7 @@ def test_kneighbors_classifier_float_labels(n_samples=40, n_features=5, def test_kneighbors_classifier_predict_proba(): - """Test KNeighborsClassifier.predict_proba() method""" + # Test KNeighborsClassifier.predict_proba() method X = np.array([[0, 2, 0], [0, 2, 1], [2, 0, 0], @@ -221,7 +221,7 @@ def test_radius_neighbors_classifier(n_samples=40, n_test_pts=10, radius=0.5, random_state=0): - """Test radius-based classification""" + # Test radius-based classification rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = ((X ** 2).sum(axis=1) < .5).astype(np.int) @@ -244,8 +244,8 @@ def test_radius_neighbors_classifier(n_samples=40, def test_radius_neighbors_classifier_when_no_neighbors(): - """ Test radius-based classifier when no neighbors found. - In this case it should rise an informative exception """ + # Test radius-based classifier when no neighbors found. + # In this case it should rise an informative exception X = np.array([[1.0, 1.0], [2.0, 2.0]]) y = np.array([1, 2]) @@ -273,8 +273,8 @@ def test_radius_neighbors_classifier_when_no_neighbors(): def test_radius_neighbors_classifier_outlier_labeling(): - """ Test radius-based classifier when no neighbors found and outliers - are labeled. """ + # Test radius-based classifier when no neighbors found and outliers + # are labeled. X = np.array([[1.0, 1.0], [2.0, 2.0]]) y = np.array([1, 2]) @@ -299,7 +299,7 @@ def test_radius_neighbors_classifier_outlier_labeling(): def test_radius_neighbors_classifier_zero_distance(): - """ Test radius-based classifier, when distance to a sample is zero. """ + # Test radius-based classifier, when distance to a sample is zero. X = np.array([[1.0, 1.0], [2.0, 2.0]]) y = np.array([1, 2]) @@ -320,7 +320,7 @@ def test_radius_neighbors_classifier_zero_distance(): def test_neighbors_regressors_zero_distance(): - """ Test radius-based regressor, when distance to a sample is zero. """ + # Test radius-based regressor, when distance to a sample is zero. X = np.array([[1.0, 1.0], [1.0, 1.0], [2.0, 2.0], [2.5, 2.5]]) y = np.array([1.0, 1.5, 2.0, 0.0]) @@ -371,7 +371,7 @@ def test_radius_neighbors_boundary_handling(): def test_RadiusNeighborsClassifier_multioutput(): - """Test k-NN classifier on multioutput data""" + # Test k-NN classifier on multioutput data rng = check_random_state(0) n_features = 2 n_samples = 40 @@ -411,7 +411,7 @@ def test_kneighbors_classifier_sparse(n_samples=40, n_test_pts=10, n_neighbors=5, random_state=0): - """Test k-NN classifier on sparse matrices""" + # Test k-NN classifier on sparse matrices # Like the above, but with various types of sparse matrices rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 @@ -430,7 +430,7 @@ def test_kneighbors_classifier_sparse(n_samples=40, def test_KNeighborsClassifier_multioutput(): - """Test k-NN classifier on multioutput data""" + # Test k-NN classifier on multioutput data rng = check_random_state(0) n_features = 5 n_samples = 50 @@ -480,7 +480,7 @@ def test_kneighbors_regressor(n_samples=40, n_test_pts=10, n_neighbors=3, random_state=0): - """Test k-neighbors regression""" + # Test k-neighbors regression rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X ** 2).sum(1)) @@ -502,7 +502,7 @@ def test_kneighbors_regressor(n_samples=40, def test_KNeighborsRegressor_multioutput_uniform_weight(): - """Test k-neighbors in multi-output regression with uniform weight""" + # Test k-neighbors in multi-output regression with uniform weight rng = check_random_state(0) n_features = 5 n_samples = 40 @@ -533,7 +533,7 @@ def test_kneighbors_regressor_multioutput(n_samples=40, n_test_pts=10, n_neighbors=3, random_state=0): - """Test k-neighbors in multi-output regression""" + # Test k-neighbors in multi-output regression rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X ** 2).sum(1)) @@ -560,7 +560,7 @@ def test_radius_neighbors_regressor(n_samples=40, n_test_pts=10, radius=0.5, random_state=0): - """Test radius-based neighbors regression""" + # Test radius-based neighbors regression rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X ** 2).sum(1)) @@ -582,7 +582,7 @@ def test_radius_neighbors_regressor(n_samples=40, def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(): - """Test radius neighbors in multi-output regression (uniform weight)""" + # Test radius neighbors in multi-output regression (uniform weight) rng = check_random_state(0) n_features = 5 @@ -616,7 +616,7 @@ def test_RadiusNeighborsRegressor_multioutput(n_samples=40, n_test_pts=10, n_neighbors=3, random_state=0): - """Test k-neighbors in multi-output regression with various weight""" + # Test k-neighbors in multi-output regression with various weight rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X ** 2).sum(1)) @@ -643,7 +643,7 @@ def test_kneighbors_regressor_sparse(n_samples=40, n_test_pts=10, n_neighbors=5, random_state=0): - """Test radius-based regression on sparse matrices""" + # Test radius-based regression on sparse matrices # Like the above, but with various types of sparse matrices rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 @@ -659,11 +659,9 @@ def test_kneighbors_regressor_sparse(n_samples=40, def test_neighbors_iris(): - """Sanity checks on the iris dataset - - Puts three points of each label in the plane and performs a - nearest neighbor query on points near the decision boundary. - """ + # Sanity checks on the iris dataset + # Puts three points of each label in the plane and performs a + # nearest neighbor query on points near the decision boundary. for algorithm in ALGORITHMS: clf = neighbors.KNeighborsClassifier(n_neighbors=1, @@ -682,11 +680,9 @@ def test_neighbors_iris(): def test_neighbors_digits(): - """Sanity check on the digits dataset - - the 'brute' algorithm has been observed to fail if the input - dtype is uint8 due to overflow in distance calculations. - """ + # Sanity check on the digits dataset + # the 'brute' algorithm has been observed to fail if the input + # dtype is uint8 due to overflow in distance calculations. X = digits.data.astype('uint8') Y = digits.target @@ -704,7 +700,7 @@ def test_neighbors_digits(): def test_kneighbors_graph(): - """Test kneighbors_graph to build the k-Nearest Neighbor graph.""" + # Test kneighbors_graph to build the k-Nearest Neighbor graph. X = np.array([[0, 1], [1.01, 1.], [2, 0]]) # n_neighbors = 1 @@ -741,8 +737,8 @@ def test_kneighbors_graph(): def test_kneighbors_graph_sparse(seed=36): - """Test kneighbors_graph to build the k-Nearest Neighbor graph - for sparse input.""" + # Test kneighbors_graph to build the k-Nearest Neighbor graph + # for sparse input. rng = np.random.RandomState(seed) X = rng.randn(10, 10) Xcsr = csr_matrix(X) @@ -759,7 +755,7 @@ def test_kneighbors_graph_sparse(seed=36): def test_radius_neighbors_graph(): - """Test radius_neighbors_graph to build the Nearest Neighbor graph.""" + # Test radius_neighbors_graph to build the Nearest Neighbor graph. X = np.array([[0, 1], [1.01, 1.], [2, 0]]) A = neighbors.radius_neighbors_graph(X, 1.5, mode='connectivity') @@ -778,8 +774,8 @@ def test_radius_neighbors_graph(): def test_radius_neighbors_graph_sparse(seed=36): - """Test radius_neighbors_graph to build the Nearest Neighbor graph - for sparse input.""" + # Test radius_neighbors_graph to build the Nearest Neighbor graph + # for sparse input. rng = np.random.RandomState(seed) X = rng.randn(10, 10) Xcsr = csr_matrix(X) @@ -796,7 +792,7 @@ def test_radius_neighbors_graph_sparse(seed=36): def test_neighbors_badargs(): - """Test bad argument values: these should all raise ValueErrors""" + # Test bad argument values: these should all raise ValueErrors assert_raises(ValueError, neighbors.NearestNeighbors, algorithm='blah') @@ -847,7 +843,7 @@ def test_neighbors_badargs(): def test_neighbors_metrics(n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5): - """Test computing the neighbors for various metrics""" + # Test computing the neighbors for various metrics # create a symmetric matrix V = rng.rand(n_features, n_features) VI = np.dot(V, V.T) @@ -966,7 +962,7 @@ def check_object_arrays(nparray, list_check): def test_k_and_radius_neighbors_train_is_not_query(): - """Test kneighbors et.al when query is not training data""" + # Test kneighbors et.al when query is not training data for algorithm in ALGORITHMS: @@ -995,7 +991,7 @@ def test_k_and_radius_neighbors_train_is_not_query(): def test_k_and_radius_neighbors_X_None(): - """Test kneighbors et.al when query is None""" + # Test kneighbors et.al when query is None for algorithm in ALGORITHMS: nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm) @@ -1027,7 +1023,7 @@ def test_k_and_radius_neighbors_X_None(): def test_k_and_radius_neighbors_duplicates(): - """Test behavior of kneighbors when duplicates are present in query""" + # Test behavior of kneighbors when duplicates are present in query for algorithm in ALGORITHMS: nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm) @@ -1074,7 +1070,7 @@ def test_k_and_radius_neighbors_duplicates(): def test_include_self_neighbors_graph(): - """Test include_self parameter in neighbors_graph""" + # Test include_self parameter in neighbors_graph X = [[2, 3], [4, 5]] kng = neighbors.kneighbors_graph(X, 1, include_self=True).A kng_not_self = neighbors.kneighbors_graph(X, 1, include_self=False).A diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py index 6041e246f1dcd..a735954fcb5be 100644 --- a/sklearn/neural_network/tests/test_rbm.py +++ b/sklearn/neural_network/tests/test_rbm.py @@ -59,7 +59,7 @@ def test_transform(): def test_small_sparse(): - """BernoulliRBM should work on small sparse matrices.""" + # BernoulliRBM should work on small sparse matrices. X = csr_matrix(Xdigits[:4]) BernoulliRBM().fit(X) # no exception @@ -96,10 +96,8 @@ def test_sample_hiddens(): def test_fit_gibbs(): - """ - Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] - from the same input - """ + # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] + # from the same input rng = np.random.RandomState(42) X = np.array([[0.], [1.]]) rbm1 = BernoulliRBM(n_components=2, batch_size=2, @@ -113,10 +111,8 @@ def test_fit_gibbs(): def test_fit_gibbs_sparse(): - """ - Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from - the same input even when the input is sparse, and test against non-sparse - """ + # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from + # the same input even when the input is sparse, and test against non-sparse rbm1 = test_fit_gibbs() rng = np.random.RandomState(42) from scipy.sparse import csc_matrix @@ -131,8 +127,8 @@ def test_fit_gibbs_sparse(): def test_gibbs_smoke(): - """Check if we don't get NaNs sampling the full digits dataset. - Also check that sampling again will yield different results.""" + # Check if we don't get NaNs sampling the full digits dataset. + # Also check that sampling again will yield different results. X = Xdigits rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42) @@ -144,7 +140,7 @@ def test_gibbs_smoke(): def test_score_samples(): - """Test score_samples (pseudo-likelihood) method.""" + # Test score_samples (pseudo-likelihood) method. # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) @@ -179,9 +175,7 @@ def test_rbm_verbose(): def test_sparse_and_verbose(): - """ - Make sure RBM works with sparse input when verbose=True - """ + # Make sure RBM works with sparse input when verbose=True old_stdout = sys.stdout sys.stdout = StringIO() from scipy.sparse import csc_matrix diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index d32fddb5071c5..8fd59edea8c88 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -40,7 +40,7 @@ def toarray(a): def test_polynomial_features(): - """Test Polynomial Features""" + # Test Polynomial Features X1 = np.arange(6)[:, np.newaxis] P1 = np.hstack([np.ones_like(X1), X1, X1 ** 2, X1 ** 3]) @@ -70,7 +70,7 @@ def test_polynomial_features(): def test_scaler_1d(): - """Test scaling of dataset along single axis""" + # Test scaling of dataset along single axis rng = np.random.RandomState(0) X = rng.randn(5) X_orig_copy = X.copy() @@ -100,7 +100,7 @@ def test_scaler_1d(): def test_scaler_2d_arrays(): - """Test scaling of 2d array along first axis""" + # Test scaling of 2d array along first axis rng = np.random.RandomState(0) X = rng.randn(4, 5) X[:, 0] = 0.0 # first feature is always of zero @@ -181,7 +181,7 @@ def test_min_max_scaler_iris(): def test_min_max_scaler_zero_variance_features(): - """Check min max scaler on toy data with zero variance features""" + # Check min max scaler on toy data with zero variance features X = [[0., 1., +0.5], [0., 1., -0.1], [0., 1., +1.1]] @@ -216,7 +216,7 @@ def test_min_max_scaler_zero_variance_features(): def test_min_max_scaler_1d(): - """Test scaling of dataset along single axis""" + # Test scaling of dataset along single axis rng = np.random.RandomState(0) X = rng.randn(5) X_orig_copy = X.copy() @@ -378,7 +378,7 @@ def test_scaler_int(): def test_scaler_without_copy(): - """Check that StandardScaler.fit does not change input""" + # Check that StandardScaler.fit does not change input rng = np.random.RandomState(42) X = rng.randn(4, 5) X[:, 0] = 0.0 # first feature is always of zero @@ -411,7 +411,7 @@ def test_scale_sparse_with_mean_raise_exception(): def test_scale_input_finiteness_validation(): - """Check if non finite inputs raise ValueError""" + # Check if non finite inputs raise ValueError X = [np.nan, 5, 6, 7, 8] assert_raises_regex(ValueError, "Input contains NaN, infinity or a value too large", @@ -454,7 +454,7 @@ def test_scale_function_without_centering(): def test_warning_scaling_integers(): - """Check warning when scaling integer data""" + # Check warning when scaling integer data X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8) @@ -564,7 +564,7 @@ def test_normalizer_l2(): def test_normalize(): - """Test normalize function""" + # Test normalize function # Only tests functionality not used by the tests for Normalizer. X = np.random.RandomState(37).randn(3, 2) assert_array_equal(normalize(X, copy=False), @@ -622,8 +622,8 @@ def test_binarizer(): def test_center_kernel(): - """Test that KernelCenterer is equivalent to StandardScaler - in feature space""" + # Test that KernelCenterer is equivalent to StandardScaler + # in feature space rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) scaler = StandardScaler(with_std=False) @@ -683,7 +683,7 @@ def test_add_dummy_feature_csr(): def test_one_hot_encoder_sparse(): - """Test OneHotEncoder's fit and transform.""" + # Test OneHotEncoder's fit and transform. X = [[3, 2, 1], [0, 1, 1]] enc = OneHotEncoder() # discover max values automatically @@ -736,7 +736,7 @@ def test_one_hot_encoder_sparse(): assert_raises(ValueError, enc.transform, [[0], [-1]]) def test_one_hot_encoder_dense(): - """check for sparse=False""" + # check for sparse=False X = [[3, 2, 1], [0, 1, 1]] enc = OneHotEncoder(sparse=False) # discover max values automatically diff --git a/sklearn/preprocessing/tests/test_imputation.py b/sklearn/preprocessing/tests/test_imputation.py index 608fdacee6e12..bfcfc2a753b6a 100644 --- a/sklearn/preprocessing/tests/test_imputation.py +++ b/sklearn/preprocessing/tests/test_imputation.py @@ -75,7 +75,7 @@ def _check_statistics(X, X_true, def test_imputation_shape(): - """Verify the shapes of the imputed matrix for different strategies.""" + # Verify the shapes of the imputed matrix for different strategies. X = np.random.randn(10, 2) X[::2] = np.nan @@ -88,8 +88,8 @@ def test_imputation_shape(): def test_imputation_mean_median_only_zero(): - """Test imputation using the mean and median strategies, when - missing_values == 0.""" + # Test imputation using the mean and median strategies, when + # missing_values == 0. X = np.array([ [np.nan, 0, 0, 0, 5], [np.nan, 1, 0, np.nan, 3], @@ -122,8 +122,8 @@ def test_imputation_mean_median_only_zero(): def test_imputation_mean_median(): - """Test imputation using the mean and median strategies, when - missing_values != 0.""" + # Test imputation using the mean and median strategies, when + # missing_values != 0. rng = np.random.RandomState(0) dim = 10 @@ -192,8 +192,7 @@ def test_imputation_mean_median(): def test_imputation_median_special_cases(): - """Test median imputation with sparse boundary cases - """ + # Test median imputation with sparse boundary cases X = np.array([ [0, np.nan, np.nan], # odd: implicit zero [5, np.nan, np.nan], # odd: explicit nonzero @@ -222,7 +221,7 @@ def test_imputation_median_special_cases(): def test_imputation_most_frequent(): - """Test imputation using the most-frequent strategy.""" + # Test imputation using the most-frequent strategy. X = np.array([ [-1, -1, 0, 5], [-1, 2, -1, 3], @@ -245,7 +244,7 @@ def test_imputation_most_frequent(): def test_imputation_pipeline_grid_search(): - """Test imputation within a pipeline + gridsearch.""" + # Test imputation within a pipeline + gridsearch. pipeline = Pipeline([('imputer', Imputer(missing_values=0)), ('tree', tree.DecisionTreeRegressor(random_state=0))]) @@ -262,7 +261,7 @@ def test_imputation_pipeline_grid_search(): def test_imputation_pickle(): - """Test for pickling imputers.""" + # Test for pickling imputers. import pickle l = 100 @@ -281,7 +280,7 @@ def test_imputation_pickle(): def test_imputation_copy(): - """Test imputation with copy""" + # Test imputation with copy X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0) # copy=True, dense => copy diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index 77629d140f422..0f3fda7adec2e 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -152,7 +152,7 @@ def test_label_binarizer_set_label_encoding(): @ignore_warnings def test_label_binarizer_errors(): - """Check that invalid arguments yield ValueError""" + # Check that invalid arguments yield ValueError one_class = np.array([0, 0, 0, 0]) lb = LabelBinarizer().fit(one_class) @@ -191,7 +191,7 @@ def test_label_binarizer_errors(): def test_label_encoder(): - """Test LabelEncoder's transform and inverse_transform methods""" + # Test LabelEncoder's transform and inverse_transform methods le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) @@ -203,7 +203,7 @@ def test_label_encoder(): def test_label_encoder_fit_transform(): - """Test fit_transform""" + # Test fit_transform le = LabelEncoder() ret = le.fit_transform([1, 1, 4, 5, -1, 0]) assert_array_equal(ret, [2, 2, 3, 4, 0, 1]) @@ -214,7 +214,7 @@ def test_label_encoder_fit_transform(): def test_label_encoder_errors(): - """Check that invalid arguments yield ValueError""" + # Check that invalid arguments yield ValueError le = LabelEncoder() assert_raises(ValueError, le.transform, []) assert_raises(ValueError, le.inverse_transform, []) @@ -333,8 +333,7 @@ def test_multilabel_binarizer_given_classes(): def test_multilabel_binarizer_same_length_sequence(): - """Ensure sequences of the same length are not interpreted as a 2-d array - """ + # Ensure sequences of the same length are not interpreted as a 2-d array inp = [[1], [0], [2]] indicator_mat = np.array([[0, 1, 0], [1, 0, 0], diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index d1b93d5299da2..eff744241ccdd 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -39,7 +39,7 @@ def test_svc(): - """Check that sparse SVC gives the same result as SVC""" + # Check that sparse SVC gives the same result as SVC clf = svm.SVC(kernel='linear', probability=True, random_state=0) clf.fit(X, Y) @@ -114,7 +114,7 @@ def test_svc_with_custom_kernel(): def test_svc_iris(): - """Test the sparse SVC with the iris dataset""" + # Test the sparse SVC with the iris dataset for k in ('linear', 'poly', 'rbf'): sp_clf = svm.SVC(kernel=k).fit(iris.data, iris.target) clf = svm.SVC(kernel=k).fit(iris.data.toarray(), iris.target) @@ -129,9 +129,7 @@ def test_svc_iris(): def test_error(): - """ - Test that it gives proper exception on deficient input - """ + # Test that it gives proper exception on deficient input # impossible value of C assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) @@ -148,9 +146,7 @@ def test_error(): def test_linearsvc(): - """ - Similar to test_SVC - """ + # Similar to test_SVC clf = svm.LinearSVC(random_state=0).fit(X, Y) sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y) @@ -169,7 +165,7 @@ def test_linearsvc(): def test_linearsvc_iris(): - """Test the sparse LinearSVC with the iris dataset""" + # Test the sparse LinearSVC with the iris dataset sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target) clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target) @@ -194,9 +190,7 @@ def test_linearsvc_iris(): def test_weight(): - """ - Test class weights - """ + # Test class weights X_, y_ = make_classification(n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0) @@ -211,9 +205,7 @@ def test_weight(): def test_sample_weights(): - """ - Test weights on individual samples - """ + # Test weights on individual samples clf = svm.SVC() clf.fit(X_sp, Y) assert_array_equal(clf.predict(X[2]), [1.]) @@ -224,19 +216,14 @@ def test_sample_weights(): def test_sparse_liblinear_intercept_handling(): - """ - Test that sparse liblinear honours intercept_scaling param - """ + # Test that sparse liblinear honours intercept_scaling param test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC) def test_sparse_realdata(): - """ - Test on a subset from the 20newsgroups dataset. - - This catchs some bugs if input is not correctly converted into - sparse format or weights are not correctly initialized. - """ + # Test on a subset from the 20newsgroups dataset. + # This catchs some bugs if input is not correctly converted into + # sparse format or weights are not correctly initialized. data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069]) indices = np.array([6, 5, 35, 31]) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 65fcbee63a2cd..08cb2d5c9ca83 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -37,9 +37,7 @@ def test_libsvm_parameters(): - """ - Test parameters on classes that make use of libsvm. - """ + # Test parameters on classes that make use of libsvm. clf = svm.SVC(kernel='linear').fit(X, Y) assert_array_equal(clf.dual_coef_, [[-0.25, .25]]) assert_array_equal(clf.support_, [1, 3]) @@ -49,7 +47,7 @@ def test_libsvm_parameters(): def test_libsvm_iris(): - """Check consistency on dataset iris.""" + # Check consistency on dataset iris. # shuffle the dataset so that labels are not ordered for k in ('linear', 'rbf'): @@ -85,9 +83,7 @@ def test_libsvm_iris(): def test_single_sample_1d(): - """ - Test whether SVCs work on a single sample given as a 1-d array - """ + # Test whether SVCs work on a single sample given as a 1-d array clf = svm.SVC().fit(X, Y) clf.predict(X[0]) @@ -97,11 +93,8 @@ def test_single_sample_1d(): def test_precomputed(): - """ - SVC with a precomputed kernel. - - We test it with a toy dataset and with iris. - """ + # SVC with a precomputed kernel. + # We test it with a toy dataset and with iris. clf = svm.SVC(kernel='precomputed') # Gram matrix for train data (square matrix) # (we use just a linear kernel) @@ -170,9 +163,7 @@ def test_precomputed(): def test_svr(): - """ - Test Support Vector Regression - """ + # Test Support Vector Regression diabetes = datasets.load_diabetes() for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0), @@ -215,9 +206,7 @@ def test_svr_errors(): def test_oneclass(): - """ - Test OneClassSVM - """ + # Test OneClassSVM clf = svm.OneClassSVM() clf.fit(X) pred = clf.predict(T) @@ -231,9 +220,7 @@ def test_oneclass(): def test_oneclass_decision_function(): - """ - Test OneClassSVM decision function - """ + # Test OneClassSVM decision function clf = svm.OneClassSVM() rnd = check_random_state(2) @@ -263,16 +250,12 @@ def test_oneclass_decision_function(): def test_tweak_params(): - """ - Make sure some tweaking of parameters works. - - We change clf.dual_coef_ at run time and expect .predict() to change - accordingly. Notice that this is not trivial since it involves a lot - of C/Python copying in the libsvm bindings. - - The success of this test ensures that the mapping between libsvm and - the python classifier is complete. - """ + # Make sure some tweaking of parameters works. + # We change clf.dual_coef_ at run time and expect .predict() to change + # accordingly. Notice that this is not trivial since it involves a lot + # of C/Python copying in the libsvm bindings. + # The success of this test ensures that the mapping between libsvm and + # the python classifier is complete. clf = svm.SVC(kernel='linear', C=1.0) clf.fit(X, Y) assert_array_equal(clf.dual_coef_, [[-.25, .25]]) @@ -282,11 +265,8 @@ def test_tweak_params(): def test_probability(): - """ - Predict probabilities using SVC - - This uses cross validation, so we use a slightly bigger testing set. - """ + # Predict probabilities using SVC + # This uses cross validation, so we use a slightly bigger testing set. for clf in (svm.SVC(probability=True, random_state=0, C=1.0), svm.NuSVC(probability=True, random_state=0)): @@ -303,14 +283,10 @@ def test_probability(): np.exp(clf.predict_log_proba(iris.data)), 8) -def test_svc_decision_function(): - """ - Test SVC's decision_function - - Sanity check, test that decision_function implemented in python - returns the same as the one in libsvm - - """ +def test_decision_function(): + # Test decision_function + # Sanity check, test that decision_function implemented in python + # returns the same as the one in libsvm # multi class: clf = svm.SVC(kernel='linear', C=0.1).fit(iris.data, iris.target) @@ -339,13 +315,9 @@ def test_svc_decision_function(): def test_svr_decision_function(): - """ - Test SVR's decision_function - - Sanity check, test that decision_function implemented in python - returns the same as the one in libsvm - - """ + # Test SVR's decision_function + # Sanity check, test that decision_function implemented in python + # returns the same as the one in libsvm X = iris.data y = iris.target @@ -365,9 +337,7 @@ def test_svr_decision_function(): def test_weight(): - """ - Test class weights - """ + # Test class weights clf = svm.SVC(class_weight={1: 0.1}) # we give a small weights to class 1 clf.fit(X, Y) @@ -386,9 +356,7 @@ def test_weight(): def test_sample_weights(): - """ - Test weights on individual samples - """ + # Test weights on individual samples # TODO: check on NuSVR, OneClass, etc. clf = svm.SVC() clf.fit(X, Y) @@ -408,7 +376,7 @@ def test_sample_weights(): def test_auto_weight(): - """Test class weights for imbalanced data""" + # Test class weights for imbalanced data from sklearn.linear_model import LogisticRegression # We take as dataset the two-dimensional projection of iris so # that it is not separable and remove half of predictors from @@ -435,9 +403,7 @@ def test_auto_weight(): def test_bad_input(): - """ - Test that it gives proper exception on deficient input - """ + # Test that it gives proper exception on deficient input # impossible value of C assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) @@ -491,9 +457,7 @@ def test_sparse_precomputed(): def test_linearsvc_parameters(): - """ - Test possible parameter combinations in LinearSVC - """ + # Test possible parameter combinations in LinearSVC # Generate list of possible parameter combinations losses = ['hinge', 'squared_hinge', 'logistic_regression', 'foo'] penalties, duals = ['l1', 'l2', 'bar'], [True, False] @@ -582,9 +546,7 @@ def test_linear_svx_uppercase_loss_penalty(): def test_linearsvc(): - """ - Test basic routines using LinearSVC - """ + # Test basic routines using LinearSVC clf = svm.LinearSVC(random_state=0).fit(X, Y) # by default should have intercept @@ -613,7 +575,7 @@ def test_linearsvc(): def test_linearsvc_crammer_singer(): - """Test LinearSVC with crammer_singer multi-class svm""" + # Test LinearSVC with crammer_singer multi-class svm ovr_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target) cs_clf = svm.LinearSVC(multi_class='crammer_singer', random_state=0) cs_clf.fit(iris.data, iris.target) @@ -633,7 +595,7 @@ def test_linearsvc_crammer_singer(): def test_crammer_singer_binary(): - """Test Crammer-Singer formulation in the binary case""" + # Test Crammer-Singer formulation in the binary case X, y = make_classification(n_classes=2, random_state=0) for fit_intercept in (True, False): @@ -645,11 +607,8 @@ def test_crammer_singer_binary(): def test_linearsvc_iris(): - """ - Test that LinearSVC gives plausible predictions on the iris dataset - - Also, test symbolic class names (classes_). - """ + # Test that LinearSVC gives plausible predictions on the iris dataset + # Also, test symbolic class names (classes_). target = iris.target_names[iris.target] clf = svm.LinearSVC(random_state=0).fit(iris.data, target) assert_equal(set(clf.classes_), set(iris.target_names)) @@ -661,9 +620,7 @@ def test_linearsvc_iris(): def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC): - """ - Test that dense liblinear honours intercept_scaling param - """ + # Test that dense liblinear honours intercept_scaling param X = [[2, 1], [3, 1], [1, 3], @@ -720,7 +677,7 @@ def test_liblinear_set_coef(): def test_immutable_coef_property(): - """Check that primal coef modification are not silently ignored""" + # Check that primal coef modification are not silently ignored svms = [ svm.SVC(kernel='linear').fit(iris.data, iris.target), svm.NuSVC(kernel='linear').fit(iris.data, iris.target), @@ -819,7 +776,7 @@ def test_consistent_proba(): def test_linear_svc_convergence_warnings(): - """Test that warnings are raised if model does not converge""" + # Test that warnings are raised if model does not converge lsvc = svm.LinearSVC(max_iter=2, verbose=1) assert_warns(ConvergenceWarning, lsvc.fit, X, Y) @@ -827,7 +784,7 @@ def test_linear_svc_convergence_warnings(): def test_svr_coef_sign(): - """Test that SVR(kernel="linear") has coef_ with the right sign.""" + # Test that SVR(kernel="linear") has coef_ with the right sign. # Non-regression test for #2933. X = np.random.RandomState(21).randn(10, 3) y = np.random.RandomState(12).randn(10) diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index d0cd0050fd8f8..59ad4e6ab5af6 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -1,4 +1,3 @@ - # Author: Gael Varoquaux # License: BSD 3 clause @@ -81,13 +80,11 @@ def __init__(self, *vargs): # The tests def test_clone(): - """Tests that clone creates a correct deep copy. - - We create an estimator, make a copy of its original state - (which, in this case, is the current state of the estimator), - and check that the obtained copy is a correct deep copy. + # Tests that clone creates a correct deep copy. + # We create an estimator, make a copy of its original state + # (which, in this case, is the current state of the estimator), + # and check that the obtained copy is a correct deep copy. - """ from sklearn.feature_selection import SelectFpr, f_classif selector = SelectFpr(f_classif, alpha=0.1) @@ -101,12 +98,11 @@ def test_clone(): def test_clone_2(): - """Tests that clone doesn't copy everything. + # Tests that clone doesn't copy everything. + # We first create an estimator, give it an own attribute, and + # make a copy of its original state. Then we check that the copy doesn't + # have the specific attribute we manually added to the initial estimator. - We first create an estimator, give it an own attribute, and - make a copy of its original state. Then we check that the copy doesn't - have the specific attribute we manually added to the initial estimator. - """ from sklearn.feature_selection import SelectFpr, f_classif selector = SelectFpr(f_classif, alpha=0.1) @@ -116,7 +112,7 @@ def test_clone_2(): def test_clone_buggy(): - """Check that clone raises an error on buggy estimators.""" + # Check that clone raises an error on buggy estimators. buggy = Buggy() buggy.a = 2 assert_raises(RuntimeError, clone, buggy) @@ -129,7 +125,7 @@ def test_clone_buggy(): def test_clone_empty_array(): - """Regression test for cloning estimators with empty arrays""" + # Regression test for cloning estimators with empty arrays clf = MyEstimator(empty=np.array([])) clf2 = clone(clf) assert_array_equal(clf.empty, clf2.empty) @@ -140,7 +136,7 @@ def test_clone_empty_array(): def test_repr(): - """Smoke test the repr of the base estimator.""" + # Smoke test the repr of the base estimator. my_estimator = MyEstimator() repr(my_estimator) test = T(K(), K()) @@ -154,7 +150,7 @@ def test_repr(): def test_str(): - """Smoke test the str of the base estimator""" + # Smoke test the str of the base estimator my_estimator = MyEstimator() str(my_estimator) @@ -200,9 +196,9 @@ def test_set_params(): # non-existing parameter of pipeline assert_raises(ValueError, clf.set_params, svm__stupid_param=True) # we don't currently catch if the things in pipeline are estimators - #bad_pipeline = Pipeline([("bad", NoEstimator())]) - #assert_raises(AttributeError, bad_pipeline.set_params, - # bad__stupid_param=True) + # bad_pipeline = Pipeline([("bad", NoEstimator())]) + # assert_raises(AttributeError, bad_pipeline.set_params, + # bad__stupid_param=True) def test_score_sample_weight(): diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 043206323ff7d..3c25714857d37 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -248,7 +248,7 @@ def test_class_weight_classifiers(): def test_class_weight_auto_classifiers(): - """Test that class_weight="auto" improves f1-score""" + # Test that class_weight="auto" improves f1-score # This test is broken; its success depends on: # * a rare fortuitous RNG seed for make_classification; and diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index 94946eff9f4fd..786bf561ec5e2 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -465,7 +465,7 @@ def assert_counts_are_ok(idx_counts, p): def test_predefinedsplit_with_kfold_split(): - '''Check that PredefinedSplit can reproduce a split generated by Kfold.''' + # Check that PredefinedSplit can reproduce a split generated by Kfold. folds = -1 * np.ones(10) kf_train = [] kf_test = [] @@ -484,8 +484,8 @@ def test_predefinedsplit_with_kfold_split(): def test_leave_label_out_changing_labels(): - """Check that LeaveOneLabelOut and LeavePLabelOut work normally if - the labels variable is changed before calling __iter__""" + # Check that LeaveOneLabelOut and LeavePLabelOut work normally if + # the labels variable is changed before calling __iter__ labels = np.array([0, 1, 2, 1, 1, 2, 0, 0]) labels_changing = np.array(labels, copy=True) lolo = cval.LeaveOneLabelOut(labels) @@ -605,9 +605,9 @@ def test_cross_val_score_fit_params(): DUMMY_OBJ = object() def assert_fit_params(clf): - """Function to test that the values are passed correctly to the - classifier arguments for non-array type - """ + # Function to test that the values are passed correctly to the + # classifier arguments for non-array type + assert_equal(clf.dummy_int, DUMMY_INT) assert_equal(clf.dummy_str, DUMMY_STR) assert_equal(clf.dummy_obj, DUMMY_OBJ) diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index 8fe2bdc6279c6..d9038187dfff2 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -93,7 +93,7 @@ def score(self): def test_parameter_grid(): - """Test basic properties of ParameterGrid.""" + # Test basic properties of ParameterGrid. params1 = {"foo": [1, 2, 3]} grid1 = ParameterGrid(params1) assert_true(isinstance(grid1, Iterable)) @@ -124,7 +124,7 @@ def test_parameter_grid(): def test_grid_search(): - """Test that the best estimator contains the right value for foo_param""" + # Test that the best estimator contains the right value for foo_param clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, verbose=3) # make sure it selects the smallest parameter in case of ties @@ -205,10 +205,8 @@ def test_grid_search_score_method(): def test_trivial_grid_scores(): - """Test search over a "grid" with only one point. - - Non-regression test: grid_scores_ wouldn't be set by GridSearchCV. - """ + # Test search over a "grid" with only one point. + # Non-regression test: grid_scores_ wouldn't be set by GridSearchCV. clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1]}) grid_search.fit(X, y) @@ -220,7 +218,7 @@ def test_trivial_grid_scores(): def test_no_refit(): - """Test that grid search can be used for model selection only""" + # Test that grid search can be used for model selection only clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=False) grid_search.fit(X, y) @@ -228,8 +226,8 @@ def test_no_refit(): def test_grid_search_error(): - """Test that grid search will capture errors on data with different - length""" + # Test that grid search will capture errors on data with different + # length X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) clf = LinearSVC() @@ -305,7 +303,7 @@ def test_grid_search_bad_param_grid(): def test_grid_search_sparse(): - """Test that grid search works with both dense and sparse matrices""" + # Test that grid search works with both dense and sparse matrices X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) clf = LinearSVC() @@ -344,8 +342,8 @@ def test_grid_search_sparse_scoring(): assert_array_equal(y_pred, y_pred2) assert_equal(C, C2) # Smoke test the score - #np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]), - # cv.score(X_[:180], y[:180])) + # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]), + # cv.score(X_[:180], y[:180])) # test loss where greater is worse def f1_loss(y_true_, y_pred_): @@ -361,8 +359,8 @@ def f1_loss(y_true_, y_pred_): def test_grid_search_precomputed_kernel(): - """Test that grid search works when the input features are given in the - form of a precomputed kernel matrix """ + # Test that grid search works when the input features are given in the + # form of a precomputed kernel matrix X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) # compute the training kernel matrix corresponding to the linear kernel @@ -389,8 +387,8 @@ def test_grid_search_precomputed_kernel(): def test_grid_search_precomputed_kernel_error_nonsquare(): - """Test that grid search returns an error with a non-square precomputed - training kernel matrix""" + # Test that grid search returns an error with a non-square precomputed + # training kernel matrix K_train = np.zeros((10, 20)) y_train = np.ones((10, )) clf = SVC(kernel='precomputed') @@ -399,7 +397,7 @@ def test_grid_search_precomputed_kernel_error_nonsquare(): def test_grid_search_precomputed_kernel_error_kernel_function(): - """Test that grid search returns an error when using a kernel_function""" + # Test that grid search returns an error when using a kernel_function X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) kernel_function = lambda x1, x2: np.dot(x1, x2.T) clf = SVC(kernel=kernel_function) @@ -422,11 +420,9 @@ def predict(self, X): def test_refit(): - """Regression test for bug in refitting - - Simulates re-fitting a broken estimator; this used to break with - sparse SVMs. - """ + # Regression test for bug in refitting + # Simulates re-fitting a broken estimator; this used to break with + # sparse SVMs. X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) @@ -436,7 +432,7 @@ def test_refit(): def test_gridsearch_nd(): - """Pass X as list in GridSearchCV""" + # Pass X as list in GridSearchCV X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2) y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11) check_X = lambda x: x.shape[1:] == (5, 3, 2) @@ -448,7 +444,7 @@ def test_gridsearch_nd(): def test_X_as_list(): - """Pass X as list in GridSearchCV""" + # Pass X as list in GridSearchCV X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) @@ -460,7 +456,7 @@ def test_X_as_list(): def test_y_as_list(): - """Pass y as list in GridSearchCV""" + # Pass y as list in GridSearchCV X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) @@ -608,7 +604,7 @@ def test_grid_search_score_consistency(): def test_pickle(): - """Test that a fit search can be pickled""" + # Test that a fit search can be pickled clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True) grid_search.fit(X, y) @@ -621,7 +617,7 @@ def test_pickle(): def test_grid_search_with_multioutput_data(): - """ Test search with multi-output estimator""" + # Test search with multi-output estimator X, y = make_multilabel_classification(return_indicator=True, random_state=0) @@ -647,7 +643,8 @@ def test_grid_search_with_multioutput_data(): # Test with a randomized search for est in estimators: - random_search = RandomizedSearchCV(est, est_parameters, cv=cv, n_iter=3) + random_search = RandomizedSearchCV(est, est_parameters, + cv=cv, n_iter=3) random_search.fit(X, y) for parameters, _, cv_validation_scores in random_search.grid_scores_: est.set_params(**parameters) @@ -660,7 +657,7 @@ def test_grid_search_with_multioutput_data(): def test_predict_proba_disabled(): - """Test predict_proba when disabled on estimator.""" + # Test predict_proba when disabled on estimator. X = np.arange(20).reshape(5, -1) y = [0, 0, 1, 1, 1] clf = SVC(probability=False) @@ -669,7 +666,7 @@ def test_predict_proba_disabled(): def test_grid_search_allows_nans(): - """ Test GridSearchCV with Imputer """ + # Test GridSearchCV with Imputer X = np.arange(20, dtype=np.float64).reshape(5, -1) X[2, :] = np.nan y = [0, 0, 1, 1, 1] @@ -697,10 +694,8 @@ def predict(self, X): def test_grid_search_failing_classifier(): - """GridSearchCV with on_error != 'raise' - - Ensures that a warning is raised and score reset where appropriate. - """ + # GridSearchCV with on_error != 'raise' + # Ensures that a warning is raised and score reset where appropriate. X, y = make_classification(n_samples=20, n_features=10, random_state=0) @@ -733,7 +728,7 @@ def test_grid_search_failing_classifier(): def test_grid_search_failing_classifier_raise(): - """GridSearchCV with on_error == 'raise' raises the error""" + # GridSearchCV with on_error == 'raise' raises the error X, y = make_classification(n_samples=20, n_features=10, random_state=0) @@ -764,7 +759,8 @@ def test_parameters_sampler_replacement(): sampler = ParameterSampler(params, n_iter=99, random_state=42) samples = list(sampler) assert_equal(len(samples), 99) - hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) for p in samples] + hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) + for p in samples] assert_equal(len(set(hashable_samples)), 99) # doesn't go into infinite loops diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py index a7df69ee77b30..2c7d3b6b62e21 100644 --- a/sklearn/tests/test_init.py +++ b/sklearn/tests/test_init.py @@ -14,9 +14,7 @@ def test_import_skl(): - """Test either above import has failed for some reason - - "import *" is discouraged outside of the module level, hence we - rely on setting up the variable above - """ + # Test either above import has failed for some reason + # "import *" is discouraged outside of the module level, hence we + # rely on setting up the variable above assert_equal(_top_import_error, None) diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index e24eda736e5e8..8469d67a1672a 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -22,7 +22,7 @@ def test_additive_chi2_sampler(): - """test that AdditiveChi2Sampler approximates kernel on random data""" + # test that AdditiveChi2Sampler approximates kernel on random data # compute exact kernel # appreviations for easier formular @@ -80,7 +80,7 @@ def test_additive_chi2_sampler(): def test_skewed_chi2_sampler(): - """test that RBFSampler approximates kernel on random data""" + # test that RBFSampler approximates kernel on random data # compute exact kernel c = 0.03 @@ -111,7 +111,7 @@ def test_skewed_chi2_sampler(): def test_rbf_sampler(): - """test that RBFSampler approximates kernel on random data""" + # test that RBFSampler approximates kernel on random data # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) @@ -130,10 +130,8 @@ def test_rbf_sampler(): def test_input_validation(): - """Regression test: kernel approx. transformers should work on lists - - No assertions; the old versions would simply crash - """ + # Regression test: kernel approx. transformers should work on lists + # No assertions; the old versions would simply crash X = [[1, 2], [3, 4], [5, 6]] AdditiveChi2Sampler().fit(X).transform(X) SkewedChi2Sampler().fit(X).transform(X) @@ -188,7 +186,7 @@ def test_nystroem_singular_kernel(): def test_nystroem_poly_kernel_params(): - """Non-regression: Nystroem should pass other parameters beside gamma.""" + # Non-regression: Nystroem should pass other parameters beside gamma. rnd = np.random.RandomState(37) X = rnd.uniform(size=(10, 4)) @@ -200,7 +198,7 @@ def test_nystroem_poly_kernel_params(): def test_nystroem_callable(): - """Test Nystroem on a callable.""" + # Test Nystroem on a callable. rnd = np.random.RandomState(42) n_samples = 10 X = rnd.uniform(size=(n_samples, 4)) diff --git a/sklearn/tests/test_lda.py b/sklearn/tests/test_lda.py index 336ceb043dcb8..8cc5faf1b3094 100644 --- a/sklearn/tests/test_lda.py +++ b/sklearn/tests/test_lda.py @@ -25,11 +25,9 @@ def test_lda_predict(): - """Test LDA classification. - - This checks that LDA implements fit and predict and returns correct values - for simple toy data. - """ + # Test LDA classification. + # This checks that LDA implements fit and predict and returns correct values + # for simple toy data. for test_case in solver_shrinkage: solver, shrinkage = test_case clf = lda.LDA(solver=solver, shrinkage=shrinkage) @@ -66,8 +64,7 @@ def test_lda_predict(): def test_lda_coefs(): - """Test if the coefficients of the solvers are approximately the same. - """ + # Test if the coefficients of the solvers are approximately the same. n_features = 2 n_classes = 2 n_samples = 1000 @@ -88,8 +85,7 @@ def test_lda_coefs(): def test_lda_transform(): - """Test LDA transform. - """ + # Test LDA transform. clf = lda.LDA(solver="svd", n_components=1) X_transformed = clf.fit(X, y).transform(X) assert_equal(X_transformed.shape[1], 1) @@ -132,8 +128,7 @@ def test_lda_orthogonality(): def test_lda_scaling(): - """Test if classification works correctly with differently scaled features. - """ + # Test if classification works correctly with differently scaled features. n = 100 rng = np.random.RandomState(1234) # use uniform distribution of features to make sure there is absolutely no diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index c0cee613a874c..adc56a4fa749d 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -44,7 +44,7 @@ def __init__(self, name, construct, skip_methods=(), def test_metaestimator_delegation(): - """Ensures specified metaestimators have methods iff subestimator does""" + # Ensures specified metaestimators have methods iff subestimator does def hides(method): @property def wrapper(obj): diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index dad7363dde599..b783355007d7c 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -121,7 +121,7 @@ def test_ovr_fit_predict_sparse(): def test_ovr_always_present(): - """Test that ovr works with classes that are always present or absent.""" + # Test that ovr works with classes that are always present or absent. # Note: tests is the case where _ConstantPredictor is utilised X = np.ones((10, 2)) X[:5, :] = 0 diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 658130e3ec75f..0e180b461b01a 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -32,12 +32,9 @@ def test_gnb(): - """ - Gaussian Naive Bayes classification. - - This checks that GaussianNB implements fit and predict and returns - correct values for a simple toy dataset. - """ + # Gaussian Naive Bayes classification. + # This checks that GaussianNB implements fit and predict and returns + # correct values for a simple toy dataset. clf = GaussianNB() y_pred = clf.fit(X, y).predict(X) @@ -54,7 +51,7 @@ def test_gnb(): def test_gnb_prior(): - """Test whether class priors are properly set. """ + # Test whether class priors are properly set. clf = GaussianNB().fit(X, y) assert_array_almost_equal(np.array([3, 3]) / 6.0, clf.class_prior_, 8) @@ -96,7 +93,7 @@ def test_gnb_sample_weight(): def test_discrete_prior(): - """Test whether class priors are properly set. """ + # Test whether class priors are properly set. for cls in [BernoulliNB, MultinomialNB]: clf = cls().fit(X2, y2) assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0), @@ -104,11 +101,9 @@ def test_discrete_prior(): def test_mnnb(): - """Test Multinomial Naive Bayes classification. - - This checks that MultinomialNB implements fit and predict and returns - correct values for a simple toy dataset. - """ + # Test Multinomial Naive Bayes classification. + # This checks that MultinomialNB implements fit and predict and returns + # correct values for a simple toy dataset. for X in [X2, scipy.sparse.csr_matrix(X2)]: # Check the ability to predict the learning set. @@ -188,7 +183,7 @@ def test_gnb_partial_fit(): def test_discretenb_pickle(): - """Test picklability of discrete naive Bayes classifiers""" + # Test picklability of discrete naive Bayes classifiers for cls in [BernoulliNB, MultinomialNB, GaussianNB]: clf = cls().fit(X2, y2) @@ -213,7 +208,7 @@ def test_discretenb_pickle(): def test_input_check_fit(): - """Test input checks for the fit method""" + # Test input checks for the fit method for cls in [BernoulliNB, MultinomialNB, GaussianNB]: # check shape consistency for number of samples at fit time assert_raises(ValueError, cls().fit, X2, y2[:-1]) @@ -246,7 +241,7 @@ def test_input_check_partial_fit(): def test_discretenb_predict_proba(): - """Test discrete NB classes' probability scores""" + # Test discrete NB classes' probability scores # The 100s below distinguish Bernoulli from multinomial. # FIXME: write a test to show this. @@ -277,8 +272,8 @@ def test_discretenb_predict_proba(): def test_discretenb_uniform_prior(): - """Test whether discrete NB classes fit a uniform prior - when fit_prior=False and class_prior=None""" + # Test whether discrete NB classes fit a uniform prior + # when fit_prior=False and class_prior=None for cls in [BernoulliNB, MultinomialNB]: clf = cls() @@ -289,7 +284,7 @@ def test_discretenb_uniform_prior(): def test_discretenb_provide_prior(): - """Test whether discrete NB classes use provided prior""" + # Test whether discrete NB classes use provided prior for cls in [BernoulliNB, MultinomialNB]: clf = cls(class_prior=[0.5, 0.5]) @@ -304,8 +299,8 @@ def test_discretenb_provide_prior(): def test_discretenb_provide_prior_with_partial_fit(): - """Test whether discrete NB classes use provided prior - when using partial_fit""" + # Test whether discrete NB classes use provided prior + # when using partial_fit iris = load_iris() iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split( @@ -363,10 +358,8 @@ def test_sample_weight_mnb(): def test_coef_intercept_shape(): - """coef_ and intercept_ should have shapes as in other linear models. - - Non-regression test for issue #2127. - """ + # coef_ and intercept_ should have shapes as in other linear models. + # Non-regression test for issue #2127. X = [[1, 0, 0], [1, 1, 1]] y = [1, 2] # binary classification @@ -408,13 +401,11 @@ def test_check_accuracy_on_digits(): def test_feature_log_prob_bnb(): - """Test for issue #4268. - - Tests that the feature log prob value computed by BernoulliNB when - alpha=1.0 is equal to the expression given in Manning, Raghavan, - and Schuetze's "Introduction to Information Retrieval" book: - http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html - """ + # Test for issue #4268. + # Tests that the feature log prob value computed by BernoulliNB when + # alpha=1.0 is equal to the expression given in Manning, Raghavan, + # and Schuetze's "Introduction to Information Retrieval" book: + # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]]) Y = np.array([0, 0, 1, 2, 2]) @@ -433,12 +424,10 @@ def test_feature_log_prob_bnb(): def test_bnb(): - """ - Tests that BernoulliNB when alpha=1.0 gives the same values as - those given for the toy example in Manning, Raghavan, and - Schuetze's "Introduction to Information Retrieval" book: - http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html - """ + # Tests that BernoulliNB when alpha=1.0 gives the same values as + # those given for the toy example in Manning, Raghavan, and + # Schuetze's "Introduction to Information Retrieval" book: + # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html # Training data points are: # Chinese Beijing Chinese (class: China) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index c57ac01664a6a..a44e358b2493f 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -12,7 +12,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.base import BaseEstimator, clone +from sklearn.base import clone from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression @@ -55,6 +55,7 @@ def set_params(self, **params): self.a = params['a'] return self + class TransfT(T): def transform(self, X, y=None): @@ -77,8 +78,7 @@ def predict(self, X): def test_pipeline_init(): - """ Test the various init parameters of the pipeline. - """ + # Test the various init parameters of the pipeline. assert_raises(TypeError, Pipeline) # Check that we can't instantiate pipelines with objects without fit # method @@ -129,8 +129,7 @@ def test_pipeline_init(): def test_pipeline_methods_anova(): - """ Test the various methods of the pipeline (anova). - """ + # Test the various methods of the pipeline (anova). iris = load_iris() X = iris.data y = iris.target @@ -146,8 +145,7 @@ def test_pipeline_methods_anova(): def test_pipeline_fit_params(): - """Test that the pipeline can take fit parameters - """ + # Test that the pipeline can take fit parameters pipe = Pipeline([('transf', TransfT()), ('clf', FitParamT())]) pipe.fit(X=None, y=None, clf__should_succeed=True) # classifier should return True @@ -158,7 +156,7 @@ def test_pipeline_fit_params(): def test_pipeline_methods_pca_svm(): - """Test the various methods of the pipeline (pca + svm).""" + # Test the various methods of the pipeline (pca + svm). iris = load_iris() X = iris.data y = iris.target @@ -174,7 +172,7 @@ def test_pipeline_methods_pca_svm(): def test_pipeline_methods_preprocessing_svm(): - """Test the various methods of the pipeline (preprocessing + svm).""" + # Test the various methods of the pipeline (preprocessing + svm). iris = load_iris() X = iris.data y = iris.target diff --git a/sklearn/tests/test_qda.py b/sklearn/tests/test_qda.py index fc59f05527154..949d28920b721 100644 --- a/sklearn/tests/test_qda.py +++ b/sklearn/tests/test_qda.py @@ -32,12 +32,9 @@ def test_qda(): - """ - QDA classification. - - This checks that QDA implements fit and predict and returns - correct values for a simple toy dataset. - """ + # QDA classification. + # This checks that QDA implements fit and predict and returns + # correct values for a simple toy dataset. clf = qda.QDA() y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y) diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py index 7b2fb8157ac3a..e64fed90ece16 100644 --- a/sklearn/tests/test_random_projection.py +++ b/sklearn/tests/test_random_projection.py @@ -114,24 +114,24 @@ def check_input_with_sparse_random_matrix(random_matrix): def test_basic_property_of_random_matrix(): - """Check basic properties of random matrix generation""" + # Check basic properties of random matrix generation for random_matrix in all_random_matrix: - check_input_size_random_matrix(random_matrix) - check_size_generated(random_matrix) - check_zero_mean_and_unit_norm(random_matrix) + yield check_input_size_random_matrix, random_matrix + yield check_size_generated, random_matrix + yield check_zero_mean_and_unit_norm, random_matrix for random_matrix in all_sparse_random_matrix: - check_input_with_sparse_random_matrix(random_matrix) + yield check_input_with_sparse_random_matrix, random_matrix random_matrix_dense = \ lambda n_components, n_features, random_state: random_matrix( n_components, n_features, random_state=random_state, density=1.0) - check_zero_mean_and_unit_norm(random_matrix_dense) + yield check_zero_mean_and_unit_norm, random_matrix_dense def test_gaussian_random_matrix(): - """Check some statical properties of Gaussian random matrix""" + # Check some statical properties of Gaussian random matrix # Check that the random matrix follow the proper distribution. # Let's say that each element of a_{ij} of A is taken from # a_ij ~ N(0.0, 1 / n_components). @@ -145,7 +145,7 @@ def test_gaussian_random_matrix(): def test_sparse_random_matrix(): - """Check some statical properties of sparse random matrix""" + # Check some statical properties of sparse random matrix n_components = 100 n_features = 500 diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index 30af1bb8f3b08..c66e6e225891e 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -17,7 +17,7 @@ def test_graphviz_toy(): - """Check correctness of export_graphviz""" + # Check correctness of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1, criterion="gini", @@ -75,7 +75,7 @@ def test_graphviz_toy(): def test_graphviz_errors(): - """Check for errors of export_graphviz""" + # Check for errors of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1) clf.fit(X, y) diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index c8f68b21bbe58..0891db43010cd 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -182,7 +182,7 @@ def assert_tree_equal(d, s, message): def test_classification_toy(): - """Check classification on a toy dataset.""" + # Check classification on a toy dataset. for name, Tree in CLF_TREES.items(): clf = Tree(random_state=0) clf.fit(X, y) @@ -196,7 +196,7 @@ def test_classification_toy(): def test_weighted_classification_toy(): - """Check classification on a weighted toy dataset.""" + # Check classification on a weighted toy dataset. for name, Tree in CLF_TREES.items(): clf = Tree(random_state=0) @@ -210,7 +210,7 @@ def test_weighted_classification_toy(): def test_regression_toy(): - """Check regression on a toy dataset.""" + # Check regression on a toy dataset. for name, Tree in REG_TREES.items(): reg = Tree(random_state=1) reg.fit(X, y) @@ -224,7 +224,7 @@ def test_regression_toy(): def test_xor(): - """Check on a XOR problem""" + # Check on a XOR problem y = np.zeros((10, 10)) y[:5, :5] = 1 y[5:, 5:] = 1 @@ -247,7 +247,7 @@ def test_xor(): def test_iris(): - """Check consistency on dataset iris.""" + # Check consistency on dataset iris. for (name, Tree), criterion in product(CLF_TREES.items(), CLF_CRITERIONS): clf = Tree(criterion=criterion, random_state=0) clf.fit(iris.data, iris.target) @@ -265,7 +265,7 @@ def test_iris(): def test_boston(): - """Check consistency on dataset boston house prices.""" + # Check consistency on dataset boston house prices. for (name, Tree), criterion in product(REG_TREES.items(), REG_CRITERIONS): reg = Tree(criterion=criterion, random_state=0) @@ -286,7 +286,7 @@ def test_boston(): def test_probability(): - """Predict probabilities using DecisionTreeClassifier.""" + # Predict probabilities using DecisionTreeClassifier. for name, Tree in CLF_TREES.items(): clf = Tree(max_depth=1, max_features=1, random_state=42) @@ -305,7 +305,7 @@ def test_probability(): def test_arrayrepr(): - """Check the array representation.""" + # Check the array representation. # Check resize X = np.arange(10000)[:, np.newaxis] y = np.arange(10000) @@ -316,7 +316,7 @@ def test_arrayrepr(): def test_pure_set(): - """Check when y is pure.""" + # Check when y is pure. X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [1, 1, 1, 1, 1, 1] @@ -334,7 +334,7 @@ def test_pure_set(): def test_numerical_stability(): - """Check numerical stability.""" + # Check numerical stability. X = np.array([ [152.08097839, 140.40744019, 129.75102234, 159.90493774], [142.50700378, 135.81935120, 117.82884979, 162.75781250], @@ -357,7 +357,7 @@ def test_numerical_stability(): def test_importances(): - """Check variable importances.""" + # Check variable importances. X, y = datasets.make_classification(n_samples=2000, n_features=10, n_informative=3, @@ -393,13 +393,13 @@ def test_importances(): @raises(ValueError) def test_importances_raises(): - """Check if variable importance before fit raises ValueError. """ + # Check if variable importance before fit raises ValueError. clf = DecisionTreeClassifier() clf.feature_importances_ def test_importances_gini_equal_mse(): - """Check that gini is equivalent to mse for binary output variable""" + # Check that gini is equivalent to mse for binary output variable X, y = datasets.make_classification(n_samples=2000, n_features=10, @@ -425,7 +425,7 @@ def test_importances_gini_equal_mse(): def test_max_features(): - """Check max_features.""" + # Check max_features. for name, TreeRegressor in REG_TREES.items(): reg = TreeRegressor(max_features="auto") reg.fit(boston.data, boston.target) @@ -490,7 +490,7 @@ def test_max_features(): def test_error(): - """Test that it gives proper exception on deficient input.""" + # Test that it gives proper exception on deficient input. for name, TreeEstimator in CLF_TREES.items(): # predict before fit est = TreeEstimator() @@ -547,7 +547,7 @@ def test_error(): def test_min_samples_leaf(): - """Test if leaves contain more than leaf_count training examples""" + # Test if leaves contain more than leaf_count training examples X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE)) y = iris.target @@ -617,7 +617,7 @@ def test_min_weight_fraction_leaf(): def test_pickle(): - """Check that tree estimator are pickable """ + # Check that tree estimator are pickable for name, TreeClassifier in CLF_TREES.items(): clf = TreeClassifier(random_state=0) clf.fit(iris.data, iris.target) @@ -646,7 +646,7 @@ def test_pickle(): def test_multioutput(): - """Check estimators on multi-output problems.""" + # Check estimators on multi-output problems. X = [[-2, -1], [-1, -1], [-1, -2], @@ -702,7 +702,7 @@ def test_multioutput(): def test_classes_shape(): - """Test that n_classes_ and classes_ have proper shape.""" + # Test that n_classes_ and classes_ have proper shape. for name, TreeClassifier in CLF_TREES.items(): # Classification, single output clf = TreeClassifier(random_state=0) @@ -722,7 +722,7 @@ def test_classes_shape(): def test_unbalanced_iris(): - """Check class rebalancing.""" + # Check class rebalancing. unbalanced_X = iris.data[:125] unbalanced_y = iris.target[:125] sample_weight = _balance_weights(unbalanced_y) @@ -734,7 +734,7 @@ def test_unbalanced_iris(): def test_memory_layout(): - """Check that it works no matter the memory layout""" + # Check that it works no matter the memory layout for (name, TreeEstimator), dtype in product(ALL_TREES.items(), [np.float64, np.float32]): est = TreeEstimator(random_state=0) @@ -777,7 +777,7 @@ def test_memory_layout(): def test_sample_weight(): - """Check sample weighting.""" + # Check sample weighting. # Test that zero-weighted samples are not taken into account X = np.arange(100)[:, np.newaxis] y = np.ones(100) @@ -828,7 +828,7 @@ def test_sample_weight(): def test_sample_weight_invalid(): - """Check sample weighting raises errors.""" + # Check sample weighting raises errors. X = np.arange(100)[:, np.newaxis] y = np.ones(100) y[:50] = 0.0 @@ -897,7 +897,7 @@ def test_class_weights(): def check_class_weight_errors(name): - """Test if class_weight raises errors and warnings when expected.""" + # Test if class_weight raises errors and warnings when expected. TreeClassifier = CLF_TREES[name] _y = np.vstack((y, np.array(y) * 2)).T @@ -921,7 +921,7 @@ def test_class_weight_errors(): def test_max_leaf_nodes(): - """Test greedy trees with max_depth + 1 leafs. """ + # Test greedy trees with max_depth + 1 leafs. from sklearn.tree._tree import TREE_LEAF X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) k = 4 @@ -940,7 +940,7 @@ def test_max_leaf_nodes(): def test_max_leaf_nodes_max_depth(): - """Test preceedence of max_leaf_nodes over max_depth. """ + # Test preceedence of max_leaf_nodes over max_depth. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) k = 4 for name, TreeEstimator in ALL_TREES.items(): @@ -950,10 +950,8 @@ def test_max_leaf_nodes_max_depth(): def test_arrays_persist(): - """Ensure property arrays' memory stays alive when tree disappears - - non-regression for #2726 - """ + # Ensure property arrays' memory stays alive when tree disappears + # non-regression for #2726 for attr in ['n_classes', 'value', 'children_left', 'children_right', 'threshold', 'impurity', 'feature', 'n_node_samples']: value = getattr(DecisionTreeClassifier().fit([[0]], [0]).tree_, attr) @@ -991,7 +989,7 @@ def test_with_only_one_non_constant_features(): def test_big_input(): - """Test if the warning for too large inputs is appropriate.""" + # Test if the warning for too large inputs is appropriate. X = np.repeat(10 ** 40., 4).astype(np.float64).reshape(-1, 1) clf = DecisionTreeClassifier() try: diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index 920ce5893b95a..daf3cfbefb83b 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -11,7 +11,7 @@ def test_compute_class_weight(): - """Test (and demo) compute_class_weight.""" + # Test (and demo) compute_class_weight. y = np.asarray([2, 2, 2, 3, 3, 4]) classes = np.unique(y) cw = compute_class_weight("auto", classes, y) @@ -20,14 +20,14 @@ def test_compute_class_weight(): def test_compute_class_weight_not_present(): - """Raise error when y does not contain all class labels""" + # Raise error when y does not contain all class labels classes = np.arange(4) y = np.asarray([0, 0, 0, 1, 1, 2]) assert_raises(ValueError, compute_class_weight, "auto", classes, y) def test_compute_class_weight_auto_negative(): - """Test compute_class_weight when labels are negative""" + # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) @@ -45,7 +45,7 @@ def test_compute_class_weight_auto_negative(): def test_compute_class_weight_auto_unordered(): - """Test compute_class_weight when classes are unordered""" + # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = compute_class_weight("auto", classes, y) @@ -55,7 +55,7 @@ def test_compute_class_weight_auto_unordered(): def test_compute_sample_weight(): - """Test (and demo) compute_sample_weight.""" + # Test (and demo) compute_sample_weight. # Test with balanced classes y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = compute_sample_weight("auto", y) @@ -97,7 +97,7 @@ def test_compute_sample_weight(): def test_compute_sample_weight_with_subsample(): - """Test compute_sample_weight with subsamples specified.""" + # Test compute_sample_weight with subsamples specified. # Test with balanced classes and all samples present y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = compute_sample_weight("auto", y, range(6)) @@ -136,7 +136,7 @@ def test_compute_sample_weight_with_subsample(): def test_compute_sample_weight_errors(): - """Test compute_sample_weight raises errors expected.""" + # Test compute_sample_weight raises errors expected. # Invalid preset string y = np.asarray([1, 1, 1, 2, 2, 2]) y_ = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 22db7d74f5f94..7c84548b4dce3 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -91,7 +91,7 @@ def test_logsumexp(): def test_randomized_svd_low_rank(): - """Check that extmath.randomized_svd is consistent with linalg.svd""" + # Check that extmath.randomized_svd is consistent with linalg.svd n_samples = 100 n_features = 500 rank = 5 @@ -151,7 +151,7 @@ def test_row_norms(): def test_randomized_svd_low_rank_with_noise(): - """Check that extmath.randomized_svd can handle noisy matrices""" + # Check that extmath.randomized_svd can handle noisy matrices n_samples = 100 n_features = 500 rank = 5 @@ -183,7 +183,7 @@ def test_randomized_svd_low_rank_with_noise(): def test_randomized_svd_infinite_rank(): - """Check that extmath.randomized_svd can handle noisy matrices""" + # Check that extmath.randomized_svd can handle noisy matrices n_samples = 100 n_features = 500 rank = 5 @@ -216,7 +216,7 @@ def test_randomized_svd_infinite_rank(): def test_randomized_svd_transpose_consistency(): - """Check that transposing the design matrix has limit impact""" + # Check that transposing the design matrix has limit impact n_samples = 100 n_features = 500 rank = 4 @@ -249,7 +249,7 @@ def test_randomized_svd_transpose_consistency(): def test_svd_flip(): - """Check that svd_flip works in both situations, and reconstructs input.""" + # Check that svd_flip works in both situations, and reconstructs input. rs = np.random.RandomState(1999) n_samples = 20 n_features = 10 @@ -286,7 +286,7 @@ def test_randomized_svd_sign_flip(): def test_cartesian(): - """Check if cartesian product delivers the right results""" + # Check if cartesian product delivers the right results axes = (np.array([1, 2, 3]), np.array([4, 5]), np.array([6, 7])) @@ -312,7 +312,7 @@ def test_cartesian(): def test_logistic_sigmoid(): - """Check correctness and robustness of logistic sigmoid implementation""" + # Check correctness and robustness of logistic sigmoid implementation naive_logistic = lambda x: 1 / (1 + np.exp(-x)) naive_log_logistic = lambda x: np.log(naive_logistic(x)) @@ -324,7 +324,7 @@ def test_logistic_sigmoid(): def test_fast_dot(): - """Check fast dot blas wrapper function""" + # Check fast dot blas wrapper function if fast_dot is np.dot: return @@ -402,7 +402,7 @@ def test_fast_dot(): def test_incremental_variance_update_formulas(): - """Test Youngs and Cramer incremental variance formulas.""" + # Test Youngs and Cramer incremental variance formulas. # Doggie data from http://www.mathsisfun.com/data/standard-deviation.html A = np.array([[600, 470, 170, 430, 300], [600, 470, 170, 430, 300], @@ -423,7 +423,7 @@ def test_incremental_variance_update_formulas(): def test_incremental_variance_ddof(): - """Test that degrees of freedom parameter for calculations are correct.""" + # Test that degrees of freedom parameter for calculations are correct. rng = np.random.RandomState(1999) X = rng.randn(50, 10) n_samples, n_features = X.shape diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index fde18d7c8999e..3580aa6acbff7 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -13,7 +13,7 @@ def test_expit(): - """Check numerical stability of expit (logistic function).""" + # Check numerical stability of expit (logistic function). # Simulate our previous Cython implementation, based on #http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index d5f6b695e04af..efd0d9ab7e920 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -209,7 +209,7 @@ def test_unique_labels(): @ignore_warnings def test_unique_labels_non_specific(): - """Test unique_labels with a variety of collected examples""" + # Test unique_labels with a variety of collected examples # Smoke test for all supported format for format in ["binary", "multiclass", "multilabel-sequences", diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index cc07ed734c105..a2db52b78bec2 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -362,7 +362,7 @@ def test_count_nonzero(): def test_csc_row_median(): - """Test csc_row_median actually calculates the median.""" + # Test csc_row_median actually calculates the median. # Test that it gives the same output when X is dense. rng = np.random.RandomState(0) diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 34e560d08561a..8ad43b5dfc5a3 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -20,7 +20,7 @@ def test_make_rng(): - """Check the check_random_state utility function behavior""" + # Check the check_random_state utility function behavior assert_true(check_random_state(None) is np.random.mtrand._rand) assert_true(check_random_state(np.random) is np.random.mtrand._rand) @@ -37,12 +37,12 @@ def test_make_rng(): def test_resample_noarg(): - """Border case not worth mentioning in doctests""" + # Border case not worth mentioning in doctests assert_true(resample() is None) def test_deprecated(): - """Test whether the deprecated decorator issues appropriate warnings""" + # Test whether the deprecated decorator issues appropriate warnings # Copied almost verbatim from http://docs.python.org/library/warnings.html # First a function... @@ -79,7 +79,7 @@ class Ham(object): def test_resample_value_errors(): - """Check that invalid arguments yield ValueError""" + # Check that invalid arguments yield ValueError assert_raises(ValueError, resample, [0], [0, 1]) assert_raises(ValueError, resample, [0, 1], [0, 1], n_samples=3) assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 0a35f76047ab5..b1317e3d5a5ef 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -30,7 +30,7 @@ def test_as_float_array(): - """Test function for as_float_array""" + # Test function for as_float_array X = np.ones((3, 10), dtype=np.int32) X = X + np.arange(10, dtype=np.int32) # Checks that the return type is ok @@ -63,7 +63,7 @@ def test_as_float_array(): def test_np_matrix(): - """Confirm that input validation code does not return np.matrix""" + # Confirm that input validation code does not return np.matrix X = np.arange(12).reshape(3, 4) assert_false(isinstance(as_float_array(X), np.matrix)) @@ -72,7 +72,7 @@ def test_np_matrix(): def test_memmap(): - """Confirm that input validation code doesn't copy memory mapped arrays""" + # Confirm that input validation code doesn't copy memory mapped arrays asflt = lambda x: as_float_array(x, copy=False) @@ -88,11 +88,9 @@ def test_memmap(): def test_ordering(): - """Check that ordering is enforced correctly by validation utilities. - - We need to check each validation utility, because a 'copy' without - 'order=K' will kill the ordering. - """ + # Check that ordering is enforced correctly by validation utilities. + # We need to check each validation utility, because a 'copy' without + # 'order=K' will kill the ordering. X = np.ones((10, 5)) for A in X, X.T: for copy in (True, False):