scikit-learn · ogrisel · Mar 23, 2015 · Mar 21, 2015 · Mar 21, 2015
diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -21,7 +21,7 @@
 
 
 def test_affinity_propagation():
-    """Affinity Propagation algorithm """
+    # Affinity Propagation algorithm
     # Compute similarities
     S = -euclidean_distances(X, squared=True)
     preference = np.median(S) * 10
@@ -60,15 +60,15 @@ def test_affinity_propagation():
 
 
 def test_affinity_propagation_predict():
-    """Test AffinityPropagation.predict"""
+    # Test AffinityPropagation.predict
     af = AffinityPropagation(affinity="euclidean")
     labels = af.fit_predict(X)
     labels2 = af.predict(X)
     assert_array_equal(labels, labels2)
 
 
 def test_affinity_propagation_predict_error():
-    """Test exception in AffinityPropagation.predict"""
+    # Test exception in AffinityPropagation.predict
     # Not fitted.
     af = AffinityPropagation(affinity="euclidean")
     assert_raises(ValueError, af.predict, X)

diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
@@ -55,7 +55,7 @@ def test_get_submatrix():
 
 
 def _test_shape_indices(model):
-    """Test get_shape and get_indices on fitted model."""
+    # Test get_shape and get_indices on fitted model.
     for i in range(model.n_clusters):
         m, n = model.get_shape(i)
         i_ind, j_ind = model.get_indices(i)
@@ -64,7 +64,7 @@ def _test_shape_indices(model):
 
 
 def test_spectral_coclustering():
-    """Test Dhillon's Spectral CoClustering on a simple problem."""
+    # Test Dhillon's Spectral CoClustering on a simple problem.
     param_grid = {'svd_method': ['randomized', 'arpack'],
                   'n_svd_vecs': [None, 20],
                   'mini_batch': [False, True],
@@ -93,7 +93,7 @@ def test_spectral_coclustering():
 
 
 def test_spectral_biclustering():
-    """Test Kluger methods on a checkerboard dataset."""
+    # Test Kluger methods on a checkerboard dataset.
     S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                       random_state=0)
 

diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
@@ -22,7 +22,7 @@
 
 
 def test_n_samples_leaves_roots():
-    """Sanity check for the number of samples in leaves and roots"""
+    # Sanity check for the number of samples in leaves and roots
     X, y = make_blobs(n_samples=10)
     brc = Birch()
     brc.fit(X)
@@ -34,7 +34,7 @@ def test_n_samples_leaves_roots():
 
 
 def test_partial_fit():
-    """Test that fit is equivalent to calling partial_fit multiple times"""
+    # Test that fit is equivalent to calling partial_fit multiple times
     X, y = make_blobs(n_samples=100)
     brc = Birch(n_clusters=3)
     brc.fit(X)
@@ -52,7 +52,7 @@ def test_partial_fit():
 
 
 def test_birch_predict():
-    """Test the predict method predicts the nearest centroid."""
+    # Test the predict method predicts the nearest centroid.
     rng = np.random.RandomState(0)
     X = generate_clustered_data(n_clusters=3, n_features=3,
                                 n_samples_per_cluster=10)
@@ -70,7 +70,7 @@ def test_birch_predict():
 
 
 def test_n_clusters():
-    """Test that n_clusters param works properly"""
+    # Test that n_clusters param works properly
     X, y = make_blobs(n_samples=100, centers=10)
     brc1 = Birch(n_clusters=10)
     brc1.fit(X)
@@ -96,7 +96,7 @@ def test_n_clusters():
 
 
 def test_sparse_X():
-    """Test that sparse and dense data give same results"""
+    # Test that sparse and dense data give same results
     X, y = make_blobs(n_samples=100, centers=10)
     brc = Birch(n_clusters=10)
     brc.fit(X)
@@ -119,7 +119,7 @@ def check_branching_factor(node, branching_factor):
 
 
 def test_branching_factor():
-    """Test that nodes have at max branching_factor number of subclusters"""
+    # Test that nodes have at max branching_factor number of subclusters
     X, y = make_blobs()
     branching_factor = 9
 
@@ -149,7 +149,7 @@ def check_threshold(birch_instance, threshold):
 
 
 def test_threshold():
-    """Test that the leaf subclusters have a threshold lesser than radius"""
+    # Test that the leaf subclusters have a threshold lesser than radius
     X, y = make_blobs(n_samples=80, centers=4)
     brc = Birch(threshold=0.5, n_clusters=None)
     brc.fit(X)

diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
@@ -25,7 +25,7 @@
 
 
 def test_dbscan_similarity():
-    """Tests the DBSCAN algorithm with a similarity array."""
+    # Tests the DBSCAN algorithm with a similarity array.
     # Parameters chosen specifically for this task.
     eps = 0.15
     min_samples = 10
@@ -48,7 +48,7 @@ def test_dbscan_similarity():
 
 
 def test_dbscan_feature():
-    """Tests the DBSCAN algorithm with a feature vector array."""
+    # Tests the DBSCAN algorithm with a feature vector array.
     # Parameters chosen specifically for this task.
     # Different eps to other test, because distance is not normalised.
     eps = 0.8
@@ -91,7 +91,7 @@ def test_dbscan_no_core_samples():
 
 
 def test_dbscan_callable():
-    """Tests the DBSCAN algorithm with a callable metric."""
+    # Tests the DBSCAN algorithm with a callable metric.
     # Parameters chosen specifically for this task.
     # Different eps to other test, because distance is not normalised.
     eps = 0.8
@@ -117,7 +117,7 @@ def test_dbscan_callable():
 
 
 def test_dbscan_balltree():
-    """Tests the DBSCAN algorithm with balltree for neighbor calculation."""
+    # Tests the DBSCAN algorithm with balltree for neighbor calculation.
     eps = 0.8
     min_samples = 10
 
@@ -156,13 +156,13 @@ def test_dbscan_balltree():
 
 
 def test_input_validation():
-    """DBSCAN.fit should accept a list of lists."""
+    # DBSCAN.fit should accept a list of lists.
     X = [[1., 2.], [3., 4.]]
     DBSCAN().fit(X)             # must not raise exception
 
 
 def test_dbscan_badargs():
-    """Test bad argument values: these should all raise ValueErrors"""
+    # Test bad argument values: these should all raise ValueErrors
     assert_raises(ValueError,
                   dbscan,
                   X, eps=-1.0)

diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
@@ -58,9 +58,7 @@ def test_linkage_misc():
 
 
 def test_structured_linkage_tree():
-    """
-    Check that we obtain the correct solution for structured linkage trees.
-    """
+    # Check that we obtain the correct solution for structured linkage trees.
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     # Avoiding a mask with only 'True' entries
@@ -82,9 +80,7 @@ def test_structured_linkage_tree():
 
 
 def test_unstructured_linkage_tree():
-    """
-    Check that we obtain the correct solution for unstructured linkage trees.
-    """
+    # Check that we obtain the correct solution for unstructured linkage trees.
     rng = np.random.RandomState(0)
     X = rng.randn(50, 100)
     for this_X in (X, X[0]):
@@ -107,9 +103,7 @@ def test_unstructured_linkage_tree():
 
 
 def test_height_linkage_tree():
-    """
-    Check that the height of the results of linkage tree is sorted.
-    """
+    # Check that the height of the results of linkage tree is sorted.
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     X = rng.randn(50, 100)
@@ -121,10 +115,8 @@ def test_height_linkage_tree():
 
 
 def test_agglomerative_clustering():
-    """
-    Check that we obtain the correct number of clusters with
-    agglomerative clustering.
-    """
+    # Check that we obtain the correct number of clusters with
+    # agglomerative clustering.
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     n_samples = 100
@@ -211,9 +203,7 @@ def test_agglomerative_clustering():
 
 
 def test_ward_agglomeration():
-    """
-    Check that we obtain the correct solution in a simplistic case
-    """
+    # Check that we obtain the correct solution in a simplistic case
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     X = rng.randn(50, 100)
@@ -245,8 +235,7 @@ def assess_same_labelling(cut1, cut2):
 
 
 def test_scikit_vs_scipy():
-    """Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
-    """
+    # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
     n, p, k = 10, 5, 3
     rng = np.random.RandomState(0)
 
@@ -273,10 +262,8 @@ def test_scikit_vs_scipy():
 
 
 def test_connectivity_propagation():
-    """
-    Check that connectivity in the ward tree is propagated correctly during
-    merging.
-    """
+    # Check that connectivity in the ward tree is propagated correctly during
+    # merging.
     X = np.array([(.014, .120), (.014, .099), (.014, .097),
                   (.017, .153), (.017, .153), (.018, .153),
                   (.018, .153), (.018, .153), (.018, .153),
@@ -291,10 +278,8 @@ def test_connectivity_propagation():
 
 
 def test_ward_tree_children_order():
-    """
-    Check that children are ordered in the same way for both structured and
-    unstructured versions of ward_tree.
-    """
+    # Check that children are ordered in the same way for both structured and
+    # unstructured versions of ward_tree.
 
     # test on five random datasets
     n, p = 10, 5
@@ -313,7 +298,7 @@ def test_ward_tree_children_order():
 
 
 def test_ward_linkage_tree_return_distance():
-    """Test return_distance option on linkage and ward trees"""
+    # Test return_distance option on linkage and ward trees
 
     # test that return_distance when set true, gives same
     # output on both structured and unstructured clustering.
@@ -420,10 +405,8 @@ def test_ward_linkage_tree_return_distance():
 
 
 def test_connectivity_fixing_non_lil():
-    """
-    Check non regression of a bug if a non item assignable connectivity is
-    provided with more than one component.
-    """
+    # Check non regression of a bug if a non item assignable connectivity is
+    # provided with more than one component.
     # create dummy data
     x = np.array([[0, 0], [1, 1]])
     # create a mask with several components to force connectivity fixing
@@ -475,7 +458,7 @@ def test_connectivity_ignores_diagonal():
 
 
 def test_compute_full_tree():
-    """Test that the full tree is computed if n_clusters is small"""
+    # Test that the full tree is computed if n_clusters is small
     rng = np.random.RandomState(0)
     X = rng.randn(10, 2)
     connectivity = kneighbors_graph(X, 5, include_self=False)
@@ -502,7 +485,7 @@ def test_compute_full_tree():
 
 
 def test_n_components():
-    """Test n_components returned by linkage, average and ward tree"""
+    # Test n_components returned by linkage, average and ward tree
     rng = np.random.RandomState(0)
     X = rng.rand(5, 5)
 

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
@@ -81,7 +81,7 @@ def test_labels_assignment_and_inertia():
 
 
 def test_minibatch_update_consistency():
-    """Check that dense and sparse minibatch update give the same results"""
+    # Check that dense and sparse minibatch update give the same results
     rng = np.random.RandomState(42)
     old_centers = centers + rng.normal(size=centers.shape)
 
@@ -480,7 +480,7 @@ def test_mini_match_k_means_invalid_init():
 
 
 def test_k_means_copyx():
-    """Check if copy_x=False returns nearly equal X after de-centering."""
+    # Check if copy_x=False returns nearly equal X after de-centering.
     my_X = X.copy()
     km = KMeans(copy_x=False, n_clusters=n_clusters, random_state=42)
     km.fit(my_X)
@@ -491,13 +491,11 @@ def test_k_means_copyx():
 
 
 def test_k_means_non_collapsed():
-    """Check k_means with a bad initialization does not yield a singleton
-
-    Starting with bad centers that are quickly ignored should not
-    result in a repositioning of the centers to the center of mass that
-    would lead to collapsed centers which in turns make the clustering
-    dependent of the numerical unstabilities.
-    """
+    # Check k_means with a bad initialization does not yield a singleton
+    # Starting with bad centers that are quickly ignored should not
+    # result in a repositioning of the centers to the center of mass that
+    # would lead to collapsed centers which in turns make the clustering
+    # dependent of the numerical unstabilities.
     my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]])
     array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]])
     km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1)
@@ -630,7 +628,7 @@ def test_fit_transform():
 
 
 def test_n_init():
-    """Check that increasing the number of init increases the quality"""
+    # Check that increasing the number of init increases the quality
     n_runs = 5
     n_init_range = [1, 5, 10]
     inertia = np.zeros((len(n_init_range), n_runs))

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
@@ -25,13 +25,13 @@
 
 
 def test_estimate_bandwidth():
-    """Test estimate_bandwidth"""
+    # Test estimate_bandwidth
     bandwidth = estimate_bandwidth(X, n_samples=200)
     assert_true(0.9 <= bandwidth <= 1.5)
 
 
 def test_mean_shift():
-    """ Test MeanShift algorithm """
+    # Test MeanShift algorithm
     bandwidth = 1.2
 
     ms = MeanShift(bandwidth=bandwidth)
@@ -47,7 +47,7 @@ def test_mean_shift():
 
 
 def test_meanshift_predict():
-    """Test MeanShift.predict"""
+    # Test MeanShift.predict
     ms = MeanShift(bandwidth=1.2)
     labels = ms.fit_predict(X)
     labels2 = ms.predict(X)
@@ -62,17 +62,15 @@ def test_meanshift_all_orphans():
 
 
 def test_unfitted():
-    """Non-regression: before fit, there should be not fitted attributes."""
+    # Non-regression: before fit, there should be not fitted attributes.
     ms = MeanShift()
     assert_false(hasattr(ms, "cluster_centers_"))
     assert_false(hasattr(ms, "labels_"))
 
 
 def test_bin_seeds():
-    """
-    Test the bin seeding technique which can be used in the mean shift
-    algorithm
-    """
+    # Test the bin seeding technique which can be used in the mean shift
+    # algorithm
     # Data is just 6 points in the plane
     X = np.array([[1., 1.], [1.4, 1.4], [1.8, 1.2],
                   [2., 1.], [2.1, 1.1], [0., 0.]])