diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index c823f0c3dcd10..e0e4091d4d2de 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -21,7 +21,7 @@
 
 
 def test_affinity_propagation():
-    """Affinity Propagation algorithm """
+    # Affinity Propagation algorithm
     # Compute similarities
     S = -euclidean_distances(X, squared=True)
     preference = np.median(S) * 10
@@ -60,7 +60,7 @@ def test_affinity_propagation():
 
 
 def test_affinity_propagation_predict():
-    """Test AffinityPropagation.predict"""
+    # Test AffinityPropagation.predict
     af = AffinityPropagation(affinity="euclidean")
     labels = af.fit_predict(X)
     labels2 = af.predict(X)
@@ -68,7 +68,7 @@ def test_affinity_propagation_predict():
 
 
 def test_affinity_propagation_predict_error():
-    """Test exception in AffinityPropagation.predict"""
+    # Test exception in AffinityPropagation.predict
     # Not fitted.
     af = AffinityPropagation(affinity="euclidean")
     assert_raises(ValueError, af.predict, X)
diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index 31513bfdfc641..9afcca5ff0eec 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -55,7 +55,7 @@ def test_get_submatrix():
 
 
 def _test_shape_indices(model):
-    """Test get_shape and get_indices on fitted model."""
+    # Test get_shape and get_indices on fitted model.
     for i in range(model.n_clusters):
         m, n = model.get_shape(i)
         i_ind, j_ind = model.get_indices(i)
@@ -64,7 +64,7 @@ def _test_shape_indices(model):
 
 
 def test_spectral_coclustering():
-    """Test Dhillon's Spectral CoClustering on a simple problem."""
+    # Test Dhillon's Spectral CoClustering on a simple problem.
     param_grid = {'svd_method': ['randomized', 'arpack'],
                   'n_svd_vecs': [None, 20],
                   'mini_batch': [False, True],
@@ -93,7 +93,7 @@ def test_spectral_coclustering():
 
 
 def test_spectral_biclustering():
-    """Test Kluger methods on a checkerboard dataset."""
+    # Test Kluger methods on a checkerboard dataset.
     S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                       random_state=0)
 
diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
index 3f51769482df3..41d915b74fc9d 100644
--- a/sklearn/cluster/tests/test_birch.py
+++ b/sklearn/cluster/tests/test_birch.py
@@ -22,7 +22,7 @@
 
 
 def test_n_samples_leaves_roots():
-    """Sanity check for the number of samples in leaves and roots"""
+    # Sanity check for the number of samples in leaves and roots
     X, y = make_blobs(n_samples=10)
     brc = Birch()
     brc.fit(X)
@@ -34,7 +34,7 @@ def test_n_samples_leaves_roots():
 
 
 def test_partial_fit():
-    """Test that fit is equivalent to calling partial_fit multiple times"""
+    # Test that fit is equivalent to calling partial_fit multiple times
     X, y = make_blobs(n_samples=100)
     brc = Birch(n_clusters=3)
     brc.fit(X)
@@ -52,7 +52,7 @@ def test_partial_fit():
 
 
 def test_birch_predict():
-    """Test the predict method predicts the nearest centroid."""
+    # Test the predict method predicts the nearest centroid.
     rng = np.random.RandomState(0)
     X = generate_clustered_data(n_clusters=3, n_features=3,
                                 n_samples_per_cluster=10)
@@ -70,7 +70,7 @@ def test_birch_predict():
 
 
 def test_n_clusters():
-    """Test that n_clusters param works properly"""
+    # Test that n_clusters param works properly
     X, y = make_blobs(n_samples=100, centers=10)
     brc1 = Birch(n_clusters=10)
     brc1.fit(X)
@@ -96,7 +96,7 @@ def test_n_clusters():
 
 
 def test_sparse_X():
-    """Test that sparse and dense data give same results"""
+    # Test that sparse and dense data give same results
     X, y = make_blobs(n_samples=100, centers=10)
     brc = Birch(n_clusters=10)
     brc.fit(X)
@@ -119,7 +119,7 @@ def check_branching_factor(node, branching_factor):
 
 
 def test_branching_factor():
-    """Test that nodes have at max branching_factor number of subclusters"""
+    # Test that nodes have at max branching_factor number of subclusters
     X, y = make_blobs()
     branching_factor = 9
 
@@ -149,7 +149,7 @@ def check_threshold(birch_instance, threshold):
 
 
 def test_threshold():
-    """Test that the leaf subclusters have a threshold lesser than radius"""
+    # Test that the leaf subclusters have a threshold lesser than radius
     X, y = make_blobs(n_samples=80, centers=4)
     brc = Birch(threshold=0.5, n_clusters=None)
     brc.fit(X)
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index be53b85a0e701..dc5dce4d8edee 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -25,7 +25,7 @@
 
 
 def test_dbscan_similarity():
-    """Tests the DBSCAN algorithm with a similarity array."""
+    # Tests the DBSCAN algorithm with a similarity array.
     # Parameters chosen specifically for this task.
     eps = 0.15
     min_samples = 10
@@ -48,7 +48,7 @@ def test_dbscan_similarity():
 
 
 def test_dbscan_feature():
-    """Tests the DBSCAN algorithm with a feature vector array."""
+    # Tests the DBSCAN algorithm with a feature vector array.
     # Parameters chosen specifically for this task.
     # Different eps to other test, because distance is not normalised.
     eps = 0.8
@@ -91,7 +91,7 @@ def test_dbscan_no_core_samples():
 
 
 def test_dbscan_callable():
-    """Tests the DBSCAN algorithm with a callable metric."""
+    # Tests the DBSCAN algorithm with a callable metric.
     # Parameters chosen specifically for this task.
     # Different eps to other test, because distance is not normalised.
     eps = 0.8
@@ -117,7 +117,7 @@ def test_dbscan_callable():
 
 
 def test_dbscan_balltree():
-    """Tests the DBSCAN algorithm with balltree for neighbor calculation."""
+    # Tests the DBSCAN algorithm with balltree for neighbor calculation.
     eps = 0.8
     min_samples = 10
 
@@ -156,13 +156,13 @@ def test_dbscan_balltree():
 
 
 def test_input_validation():
-    """DBSCAN.fit should accept a list of lists."""
+    # DBSCAN.fit should accept a list of lists.
     X = [[1., 2.], [3., 4.]]
     DBSCAN().fit(X)             # must not raise exception
 
 
 def test_dbscan_badargs():
-    """Test bad argument values: these should all raise ValueErrors"""
+    # Test bad argument values: these should all raise ValueErrors
     assert_raises(ValueError,
                   dbscan,
                   X, eps=-1.0)
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 8733fd6e05ada..8cfba9822e764 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -58,9 +58,7 @@ def test_linkage_misc():
 
 
 def test_structured_linkage_tree():
-    """
-    Check that we obtain the correct solution for structured linkage trees.
-    """
+    # Check that we obtain the correct solution for structured linkage trees.
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     # Avoiding a mask with only 'True' entries
@@ -82,9 +80,7 @@ def test_structured_linkage_tree():
 
 
 def test_unstructured_linkage_tree():
-    """
-    Check that we obtain the correct solution for unstructured linkage trees.
-    """
+    # Check that we obtain the correct solution for unstructured linkage trees.
     rng = np.random.RandomState(0)
     X = rng.randn(50, 100)
     for this_X in (X, X[0]):
@@ -107,9 +103,7 @@ def test_unstructured_linkage_tree():
 
 
 def test_height_linkage_tree():
-    """
-    Check that the height of the results of linkage tree is sorted.
-    """
+    # Check that the height of the results of linkage tree is sorted.
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     X = rng.randn(50, 100)
@@ -121,10 +115,8 @@ def test_height_linkage_tree():
 
 
 def test_agglomerative_clustering():
-    """
-    Check that we obtain the correct number of clusters with
-    agglomerative clustering.
-    """
+    # Check that we obtain the correct number of clusters with
+    # agglomerative clustering.
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     n_samples = 100
@@ -211,9 +203,7 @@ def test_agglomerative_clustering():
 
 
 def test_ward_agglomeration():
-    """
-    Check that we obtain the correct solution in a simplistic case
-    """
+    # Check that we obtain the correct solution in a simplistic case
     rng = np.random.RandomState(0)
     mask = np.ones([10, 10], dtype=np.bool)
     X = rng.randn(50, 100)
@@ -245,8 +235,7 @@ def assess_same_labelling(cut1, cut2):
 
 
 def test_scikit_vs_scipy():
-    """Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
-    """
+    # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
     n, p, k = 10, 5, 3
     rng = np.random.RandomState(0)
 
@@ -273,10 +262,8 @@ def test_scikit_vs_scipy():
 
 
 def test_connectivity_propagation():
-    """
-    Check that connectivity in the ward tree is propagated correctly during
-    merging.
-    """
+    # Check that connectivity in the ward tree is propagated correctly during
+    # merging.
     X = np.array([(.014, .120), (.014, .099), (.014, .097),
                   (.017, .153), (.017, .153), (.018, .153),
                   (.018, .153), (.018, .153), (.018, .153),
@@ -291,10 +278,8 @@ def test_connectivity_propagation():
 
 
 def test_ward_tree_children_order():
-    """
-    Check that children are ordered in the same way for both structured and
-    unstructured versions of ward_tree.
-    """
+    # Check that children are ordered in the same way for both structured and
+    # unstructured versions of ward_tree.
 
     # test on five random datasets
     n, p = 10, 5
@@ -313,7 +298,7 @@ def test_ward_tree_children_order():
 
 
 def test_ward_linkage_tree_return_distance():
-    """Test return_distance option on linkage and ward trees"""
+    # Test return_distance option on linkage and ward trees
 
     # test that return_distance when set true, gives same
     # output on both structured and unstructured clustering.
@@ -420,10 +405,8 @@ def test_ward_linkage_tree_return_distance():
 
 
 def test_connectivity_fixing_non_lil():
-    """
-    Check non regression of a bug if a non item assignable connectivity is
-    provided with more than one component.
-    """
+    # Check non regression of a bug if a non item assignable connectivity is
+    # provided with more than one component.
     # create dummy data
     x = np.array([[0, 0], [1, 1]])
     # create a mask with several components to force connectivity fixing
@@ -475,7 +458,7 @@ def test_connectivity_ignores_diagonal():
 
 
 def test_compute_full_tree():
-    """Test that the full tree is computed if n_clusters is small"""
+    # Test that the full tree is computed if n_clusters is small
     rng = np.random.RandomState(0)
     X = rng.randn(10, 2)
     connectivity = kneighbors_graph(X, 5, include_self=False)
@@ -502,7 +485,7 @@ def test_compute_full_tree():
 
 
 def test_n_components():
-    """Test n_components returned by linkage, average and ward tree"""
+    # Test n_components returned by linkage, average and ward tree
     rng = np.random.RandomState(0)
     X = rng.rand(5, 5)
 
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 75015bd678e1f..f52280c13d9cc 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -81,7 +81,7 @@ def test_labels_assignment_and_inertia():
 
 
 def test_minibatch_update_consistency():
-    """Check that dense and sparse minibatch update give the same results"""
+    # Check that dense and sparse minibatch update give the same results
     rng = np.random.RandomState(42)
     old_centers = centers + rng.normal(size=centers.shape)
 
@@ -480,7 +480,7 @@ def test_mini_match_k_means_invalid_init():
 
 
 def test_k_means_copyx():
-    """Check if copy_x=False returns nearly equal X after de-centering."""
+    # Check if copy_x=False returns nearly equal X after de-centering.
     my_X = X.copy()
     km = KMeans(copy_x=False, n_clusters=n_clusters, random_state=42)
     km.fit(my_X)
@@ -491,13 +491,11 @@ def test_k_means_copyx():
 
 
 def test_k_means_non_collapsed():
-    """Check k_means with a bad initialization does not yield a singleton
-
-    Starting with bad centers that are quickly ignored should not
-    result in a repositioning of the centers to the center of mass that
-    would lead to collapsed centers which in turns make the clustering
-    dependent of the numerical unstabilities.
-    """
+    # Check k_means with a bad initialization does not yield a singleton
+    # Starting with bad centers that are quickly ignored should not
+    # result in a repositioning of the centers to the center of mass that
+    # would lead to collapsed centers which in turns make the clustering
+    # dependent of the numerical unstabilities.
     my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]])
     array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]])
     km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1)
@@ -630,7 +628,7 @@ def test_fit_transform():
 
 
 def test_n_init():
-    """Check that increasing the number of init increases the quality"""
+    # Check that increasing the number of init increases the quality
     n_runs = 5
     n_init_range = [1, 5, 10]
     inertia = np.zeros((len(n_init_range), n_runs))
diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index 736952d0f181a..9aefa20897414 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -25,13 +25,13 @@
 
 
 def test_estimate_bandwidth():
-    """Test estimate_bandwidth"""
+    # Test estimate_bandwidth
     bandwidth = estimate_bandwidth(X, n_samples=200)
     assert_true(0.9 <= bandwidth <= 1.5)
 
 
 def test_mean_shift():
-    """ Test MeanShift algorithm """
+    # Test MeanShift algorithm
     bandwidth = 1.2
 
     ms = MeanShift(bandwidth=bandwidth)
@@ -47,7 +47,7 @@ def test_mean_shift():
 
 
 def test_meanshift_predict():
-    """Test MeanShift.predict"""
+    # Test MeanShift.predict
     ms = MeanShift(bandwidth=1.2)
     labels = ms.fit_predict(X)
     labels2 = ms.predict(X)
@@ -62,17 +62,15 @@ def test_meanshift_all_orphans():
 
 
 def test_unfitted():
-    """Non-regression: before fit, there should be not fitted attributes."""
+    # Non-regression: before fit, there should be not fitted attributes.
     ms = MeanShift()
     assert_false(hasattr(ms, "cluster_centers_"))
     assert_false(hasattr(ms, "labels_"))
 
 
 def test_bin_seeds():
-    """
-    Test the bin seeding technique which can be used in the mean shift
-    algorithm
-    """
+    # Test the bin seeding technique which can be used in the mean shift
+    # algorithm
     # Data is just 6 points in the plane
     X = np.array([[1., 1.], [1.4, 1.4], [1.8, 1.2],
                   [2., 1.], [2.1, 1.1], [0., 0.]])
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 412f0a0211353..c3810814bc17e 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -162,7 +162,7 @@ def test_affinities():
     assert_equal((X.shape[0],), labels.shape)
 
     def histogram(x, y, **kwargs):
-        """Histogram kernel implemented as a callable."""
+        # Histogram kernel implemented as a callable.
         assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
         return np.minimum(x, y).sum()
 
diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
index e2888fe99228e..188e4930e9175 100644
--- a/sklearn/covariance/tests/test_covariance.py
+++ b/sklearn/covariance/tests/test_covariance.py
@@ -23,9 +23,7 @@
 
 
 def test_covariance():
-    """Tests Covariance module on a simple dataset.
-
-    """
+    # Tests Covariance module on a simple dataset.
     # test covariance fit from data
     cov = EmpiricalCovariance()
     cov.fit(X)
@@ -76,9 +74,7 @@ def test_covariance():
 
 
 def test_shrunk_covariance():
-    """Tests ShrunkCovariance module on a simple dataset.
-
-    """
+    # Tests ShrunkCovariance module on a simple dataset.
     # compare shrunk covariance obtained from data and from MLE estimate
     cov = ShrunkCovariance(shrinkage=0.5)
     cov.fit(X)
@@ -110,9 +106,7 @@ def test_shrunk_covariance():
 
 
 def test_ledoit_wolf():
-    """Tests LedoitWolf module on a simple dataset.
-
-    """
+    # Tests LedoitWolf module on a simple dataset.
     # test shrinkage coeff on a simple data set
     X_centered = X - X.mean(axis=0)
     lw = LedoitWolf(assume_centered=True)
@@ -197,9 +191,7 @@ def test_ledoit_wolf():
 
 
 def test_oas():
-    """Tests OAS module on a simple dataset.
-
-    """
+    # Tests OAS module on a simple dataset.
     # test shrinkage coeff on a simple data set
     X_centered = X - X.mean(axis=0)
     oa = OAS(assume_centered=True)
@@ -231,7 +223,7 @@ def test_oas():
     assert_almost_equal(oa.score(X_centered), score_, 4)
     assert(oa.precision_ is None)
 
-    ### Same tests without assuming centered data
+    # Same tests without assuming centered data--------------------------------
     # test shrinkage coeff on a simple data set
     oa = OAS()
     oa.fit(X)
diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py
index d08c6291a9695..63e92fb6af803 100644
--- a/sklearn/covariance/tests/test_robust_covariance.py
+++ b/sklearn/covariance/tests/test_robust_covariance.py
@@ -21,10 +21,8 @@
 
 
 def test_mcd():
-    """Tests the FastMCD algorithm implementation
-
-    """
-    ### Small data set
+    # Tests the FastMCD algorithm implementation
+    # Small data set
     # test without outliers (random independent normal data)
     launch_mcd_on_dataset(100, 5, 0, 0.01, 0.1, 80)
     # test with a contaminated data set (medium contamination)
@@ -32,13 +30,13 @@ def test_mcd():
     # test with a contaminated data set (strong contamination)
     launch_mcd_on_dataset(100, 5, 40, 0.1, 0.1, 50)
 
-    ### Medium data set
+    # Medium data set
     launch_mcd_on_dataset(1000, 5, 450, 0.1, 0.1, 540)
 
-    ### Large data set
+    # Large data set
     launch_mcd_on_dataset(1700, 5, 800, 0.1, 0.1, 870)
 
-    ### 1D data set
+    # 1D data set
     launch_mcd_on_dataset(500, 1, 100, 0.001, 0.001, 350)
 
 
diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py
index 9efd4732206f9..84748bf8d2cc9 100644
--- a/sklearn/cross_decomposition/tests/test_pls.py
+++ b/sklearn/cross_decomposition/tests/test_pls.py
@@ -236,7 +236,7 @@ def test_PLSSVD():
 
 
 def test_univariate_pls_regression():
-    """Ensure 1d Y is correctly interpreted"""
+    # Ensure 1d Y is correctly interpreted
     d = load_linnerud()
     X = d.data
     Y = d.target
diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py
index f1c7e2dab1090..129e3c2609ef9 100644
--- a/sklearn/decomposition/tests/test_factor_analysis.py
+++ b/sklearn/decomposition/tests/test_factor_analysis.py
@@ -16,8 +16,7 @@
 
 
 def test_factor_analysis():
-    """Test FactorAnalysis ability to recover the data covariance structure
-    """
+    # Test FactorAnalysis ability to recover the data covariance structure
     rng = np.random.RandomState(0)
     n_samples, n_features, n_components = 20, 5, 3
 
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 0bd7c83d1b5fc..66963701d15bd 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -38,9 +38,7 @@ def center_and_norm(x, axis=-1):
 
 
 def test_gs():
-    """
-    Test gram schmidt orthonormalization
-    """
+    # Test gram schmidt orthonormalization
     # generate a random orthogonal  matrix
     rng = np.random.RandomState(0)
     W, _, _ = np.linalg.svd(rng.randn(10, 10))
@@ -54,8 +52,7 @@ def test_gs():
 
 
 def test_fastica_simple(add_noise=False):
-    """ Test the FastICA algorithm on very simple data.
-    """
+    # Test the FastICA algorithm on very simple data.
     rng = np.random.RandomState(0)
     # scipy.stats uses the global RNG:
     np.random.seed(0)
@@ -146,8 +143,7 @@ def test_fastica_nowhiten():
 
 
 def test_non_square_fastica(add_noise=False):
-    """ Test the FastICA algorithm on very simple data.
-    """
+    # Test the FastICA algorithm on very simple data.
     rng = np.random.RandomState(0)
 
     n_samples = 1000
@@ -190,7 +186,7 @@ def test_non_square_fastica(add_noise=False):
 
 
 def test_fit_transform():
-    """Test FastICA.fit_transform"""
+    # Test FastICA.fit_transform
     rng = np.random.RandomState(0)
     X = rng.random_sample((100, 10))
     for whiten, n_components in [[True, 5], [False, None]]:
@@ -211,7 +207,7 @@ def test_fit_transform():
 
 
 def test_inverse_transform():
-    """Test FastICA.inverse_transform"""
+    # Test FastICA.inverse_transform
     n_features = 10
     n_samples = 100
     n1, n2 = 5, 10
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index db75dfe7f9960..ae453d5f85f1c 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -12,7 +12,7 @@
 
 
 def test_incremental_pca():
-    """Incremental PCA on dense arrays."""
+    # Incremental PCA on dense arrays.
     X = iris.data
     batch_size = X.shape[0] // 3
     ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
@@ -35,7 +35,7 @@ def test_incremental_pca():
 
 
 def test_incremental_pca_check_projection():
-    """Test that the projection of data is correct."""
+    # Test that the projection of data is correct.
     rng = np.random.RandomState(1999)
     n, p = 100, 3
     X = rng.randn(n, p) * .1
@@ -56,7 +56,7 @@ def test_incremental_pca_check_projection():
 
 
 def test_incremental_pca_inverse():
-    """Test that the projection of data can be inverted."""
+    # Test that the projection of data can be inverted.
     rng = np.random.RandomState(1999)
     n, p = 50, 3
     X = rng.randn(n, p)  # spherical data
@@ -72,7 +72,7 @@ def test_incremental_pca_inverse():
 
 
 def test_incremental_pca_validation():
-    """Test that n_components is >=1 and <= n_features."""
+    # Test that n_components is >=1 and <= n_features.
     X = [[0, 1], [1, 0]]
     for n_components in [-1, 0, .99, 3]:
         assert_raises(ValueError, IncrementalPCA(n_components,
@@ -80,7 +80,7 @@ def test_incremental_pca_validation():
 
 
 def test_incremental_pca_set_params():
-    """Test that components_ sign is stable over batch sizes."""
+    # Test that components_ sign is stable over batch sizes.
     rng = np.random.RandomState(1999)
     n_samples = 100
     n_features = 20
@@ -101,7 +101,7 @@ def test_incremental_pca_set_params():
 
 
 def test_incremental_pca_num_features_change():
-    """Test that changing n_components will raise an error."""
+    # Test that changing n_components will raise an error.
     rng = np.random.RandomState(1999)
     n_samples = 100
     X = rng.randn(n_samples, 20)
@@ -112,7 +112,7 @@ def test_incremental_pca_num_features_change():
 
 
 def test_incremental_pca_batch_signs():
-    """Test that components_ sign is stable over batch sizes."""
+    # Test that components_ sign is stable over batch sizes.
     rng = np.random.RandomState(1999)
     n_samples = 100
     n_features = 3
@@ -128,7 +128,7 @@ def test_incremental_pca_batch_signs():
 
 
 def test_incremental_pca_batch_values():
-    """Test that components_ values are stable over batch sizes."""
+    # Test that components_ values are stable over batch sizes.
     rng = np.random.RandomState(1999)
     n_samples = 100
     n_features = 3
@@ -144,7 +144,7 @@ def test_incremental_pca_batch_values():
 
 
 def test_incremental_pca_partial_fit():
-    """Test that fit and partial_fit get equivalent results."""
+    # Test that fit and partial_fit get equivalent results.
     rng = np.random.RandomState(1999)
     n, p = 50, 3
     X = rng.randn(n, p)  # spherical data
@@ -164,7 +164,7 @@ def test_incremental_pca_partial_fit():
 
 
 def test_incremental_pca_against_pca_iris():
-    """Test that IncrementalPCA and PCA are approximate (to a sign flip)."""
+    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
     X = iris.data
 
     Y_pca = PCA(n_components=2).fit_transform(X)
@@ -174,7 +174,7 @@ def test_incremental_pca_against_pca_iris():
 
 
 def test_incremental_pca_against_pca_random_data():
-    """Test that IncrementalPCA and PCA are approximate (to a sign flip)."""
+    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
     rng = np.random.RandomState(1999)
     n_samples = 100
     n_features = 3
@@ -187,7 +187,7 @@ def test_incremental_pca_against_pca_random_data():
 
 
 def test_explained_variances():
-    """Test that PCA and IncrementalPCA calculations match"""
+    # Test that PCA and IncrementalPCA calculations match
     X = datasets.make_low_rank_matrix(1000, 100, tail_strength=0.,
                                       effective_rank=10, random_state=1999)
     prec = 3
@@ -204,7 +204,7 @@ def test_explained_variances():
 
 
 def test_whitening():
-    """Test that PCA and IncrementalPCA transforms match to sign flip."""
+    # Test that PCA and IncrementalPCA transforms match to sign flip.
     X = datasets.make_low_rank_matrix(1000, 10, tail_strength=0.,
                                       effective_rank=2, random_state=1999)
     prec = 3
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index a86c33a9e9937..9cd9adbc2bb1d 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -19,7 +19,7 @@ def test_kernel_pca():
     X_pred = rng.random_sample((2, 4))
 
     def histogram(x, y, **kwargs):
-        """Histogram kernel implemented as a callable."""
+        # Histogram kernel implemented as a callable.
         assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
         return np.minimum(x, y).sum()
 
@@ -78,8 +78,8 @@ def test_kernel_pca_sparse():
                          X_fit_transformed.shape[1])
 
             # inverse transform
-            #X_pred2 = kpca.inverse_transform(X_pred_transformed)
-            #assert_equal(X_pred2.shape, X_pred.shape)
+            # X_pred2 = kpca.inverse_transform(X_pred_transformed)
+            # assert_equal(X_pred2.shape, X_pred.shape)
 
 
 def test_kernel_pca_linear_kernel():
@@ -187,7 +187,7 @@ def test_gridsearch_pipeline_precomputed():
 
 
 def test_nested_circles():
-    """Test the linear separability of the first 2D KPCA transform"""
+    # Test the linear separability of the first 2D KPCA transform
     X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                         random_state=0)
 
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index 8476e7e313007..4793935bdce45 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -15,12 +15,12 @@
 
 @raises(ValueError)
 def test_initialize_nn_input():
-    """Test NNDSVD behaviour on negative input"""
+    # Test NNDSVD behaviour on negative input
     nmf._initialize_nmf(-np.ones((2, 2)), 2)
 
 
 def test_initialize_nn_output():
-    """Test that NNDSVD does not return negative values"""
+    # Test that NNDSVD does not return negative values
     data = np.abs(random_state.randn(10, 10))
     for var in (None, 'a', 'ar'):
         W, H = nmf._initialize_nmf(data, 10, random_state=0)
@@ -28,11 +28,9 @@ def test_initialize_nn_output():
 
 
 def test_initialize_close():
-    """Test NNDSVD error
-
-    Test that _initialize_nmf error is less than the standard deviation of the
-    entries in the matrix.
-    """
+    # Test NNDSVD error
+    # Test that _initialize_nmf error is less than the standard deviation of
+    # the entries in the matrix.
     A = np.abs(random_state.randn(10, 10))
     W, H = nmf._initialize_nmf(A, 10)
     error = linalg.norm(np.dot(W, H) - A)
@@ -41,11 +39,9 @@ def test_initialize_close():
 
 
 def test_initialize_variants():
-    """Test NNDSVD variants correctness
-
-    Test that the variants 'a' and 'ar' differ from basic NNDSVD only where
-    the basic version has zeros.
-    """
+    # Test NNDSVD variants correctness
+    # Test that the variants 'a' and 'ar' differ from basic NNDSVD only where
+    # the basic version has zeros.
     data = np.abs(random_state.randn(10, 10))
     W0, H0 = nmf._initialize_nmf(data, 10, variant=None)
     Wa, Ha = nmf._initialize_nmf(data, 10, variant='a')
@@ -57,14 +53,14 @@ def test_initialize_variants():
 
 @raises(ValueError)
 def test_projgrad_nmf_fit_nn_input():
-    """Test model fit behaviour on negative input"""
+    # Test model fit behaviour on negative input
     A = -np.ones((2, 2))
     m = nmf.ProjectedGradientNMF(n_components=2, init=None, random_state=0)
     m.fit(A)
 
 
 def test_projgrad_nmf_fit_nn_output():
-    """Test that the decomposition does not contain negative values"""
+    # Test that the decomposition does not contain negative values
     A = np.c_[5 * np.ones(5) - np.arange(1, 6),
               5 * np.ones(5) + np.arange(1, 6)]
     for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar'):
@@ -76,21 +72,21 @@ def test_projgrad_nmf_fit_nn_output():
 
 
 def test_projgrad_nmf_fit_close():
-    """Test that the fit is not too far away"""
+    # Test that the fit is not too far away
     pnmf = nmf.ProjectedGradientNMF(5, init='nndsvda', random_state=0)
     X = np.abs(random_state.randn(6, 5))
     assert_less(pnmf.fit(X).reconstruction_err_, 0.05)
 
 
 def test_nls_nn_output():
-    """Test that NLS solver doesn't return negative values"""
+    # Test that NLS solver doesn't return negative values
     A = np.arange(1, 5).reshape(1, -1)
     Ap, _, _ = nmf._nls_subproblem(np.dot(A.T, -A), A.T, A, 0.001, 100)
     assert_false((Ap < 0).any())
 
 
 def test_nls_close():
-    """Test that the NLS results should be close"""
+    # Test that the NLS results should be close
     A = np.arange(1, 5).reshape(1, -1)
     Ap, _, _ = nmf._nls_subproblem(np.dot(A.T, A), A.T, np.zeros_like(A),
                                    0.001, 100)
@@ -98,10 +94,8 @@ def test_nls_close():
 
 
 def test_projgrad_nmf_transform():
-    """Test that NMF.transform returns close values
-
-    (transform uses scipy.optimize.nnls for now)
-    """
+    # Test that NMF.transform returns close values
+    # (transform uses scipy.optimize.nnls for now)
     A = np.abs(random_state.randn(6, 5))
     m = nmf.ProjectedGradientNMF(n_components=5, init='nndsvd', random_state=0)
     transf = m.fit_transform(A)
@@ -109,18 +103,16 @@ def test_projgrad_nmf_transform():
 
 
 def test_n_components_greater_n_features():
-    """Smoke test for the case of more components than features."""
+    # Smoke test for the case of more components than features.
     A = np.abs(random_state.randn(30, 10))
     nmf.ProjectedGradientNMF(n_components=15, sparseness='data',
                              random_state=0).fit(A)
 
 
 def test_projgrad_nmf_sparseness():
-    """Test sparseness
-
-    Test that sparsity constraints actually increase sparseness in the
-    part where they are applied.
-    """
+    # Test sparseness
+    # Test that sparsity constraints actually increase sparseness in the
+    # part where they are applied.
     A = np.abs(random_state.randn(10, 10))
     m = nmf.ProjectedGradientNMF(n_components=5, random_state=0).fit(A)
     data_sp = nmf.ProjectedGradientNMF(n_components=5, sparseness='data',
@@ -132,7 +124,7 @@ def test_projgrad_nmf_sparseness():
 
 
 def test_sparse_input():
-    """Test that sparse matrices are accepted as input"""
+    # Test that sparse matrices are accepted as input
     from scipy.sparse import csc_matrix
 
     A = np.abs(random_state.randn(10, 10))
@@ -160,7 +152,7 @@ def test_sparse_input():
 
 
 def test_sparse_transform():
-    """Test that transform works on sparse data.  Issue #2124"""
+    # Test that transform works on sparse data.  Issue #2124
     from scipy.sparse import csc_matrix
 
     A = np.abs(random_state.randn(5, 4))
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index b31ca4d680404..34fe795d2b198 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -17,7 +17,7 @@
 
 
 def test_pca():
-    """PCA on dense arrays"""
+    # PCA on dense arrays
     pca = PCA(n_components=2)
     X = iris.data
     X_r = pca.fit(X).transform(X)
@@ -47,7 +47,7 @@ def test_pca():
 
 
 def test_whitening():
-    """Check that PCA output has unit-variance"""
+    # Check that PCA output has unit-variance
     rng = np.random.RandomState(0)
     n_samples = 100
     n_features = 80
@@ -93,7 +93,7 @@ def test_whitening():
 
 
 def test_explained_variance():
-    """Check that PCA output has unit-variance"""
+    # Check that PCA output has unit-variance
     rng = np.random.RandomState(0)
     n_samples = 100
     n_features = 80
@@ -118,7 +118,7 @@ def test_explained_variance():
 
 
 def test_pca_check_projection():
-    """Test that the projection of data is correct"""
+    # Test that the projection of data is correct
     rng = np.random.RandomState(0)
     n, p = 100, 3
     X = rng.randn(n, p) * .1
@@ -132,7 +132,7 @@ def test_pca_check_projection():
 
 
 def test_pca_inverse():
-    """Test that the projection of data can be inverted"""
+    # Test that the projection of data can be inverted
     rng = np.random.RandomState(0)
     n, p = 50, 3
     X = rng.randn(n, p)  # spherical data
@@ -161,7 +161,7 @@ def test_pca_validation():
 
 
 def test_randomized_pca_check_projection():
-    """Test that the projection by RandomizedPCA on dense data is correct"""
+    # Test that the projection by RandomizedPCA on dense data is correct
     rng = np.random.RandomState(0)
     n, p = 100, 3
     X = rng.randn(n, p) * .1
@@ -175,7 +175,7 @@ def test_randomized_pca_check_projection():
 
 
 def test_randomized_pca_check_list():
-    """Test that the projection by RandomizedPCA on list data is correct"""
+    # Test that the projection by RandomizedPCA on list data is correct
     X = [[1.0, 0.0], [0.0, 1.0]]
     X_transformed = RandomizedPCA(n_components=1,
                                   random_state=0).fit(X).transform(X)
@@ -185,7 +185,7 @@ def test_randomized_pca_check_list():
 
 
 def test_randomized_pca_inverse():
-    """Test that RandomizedPCA is inversible on dense data"""
+    # Test that RandomizedPCA is inversible on dense data
     rng = np.random.RandomState(0)
     n, p = 50, 3
     X = rng.randn(n, p)  # spherical data
@@ -209,7 +209,7 @@ def test_randomized_pca_inverse():
 
 
 def test_pca_dim():
-    """Check automated dimensionality setting"""
+    # Check automated dimensionality setting
     rng = np.random.RandomState(0)
     n, p = 100, 5
     X = rng.randn(n, p) * .1
@@ -220,10 +220,8 @@ def test_pca_dim():
 
 
 def test_infer_dim_1():
-    """TODO: explain what this is testing
-
-    Or at least use explicit variable names...
-    """
+    # TODO: explain what this is testing
+    # Or at least use explicit variable names...
     n, p = 1000, 5
     rng = np.random.RandomState(0)
     X = (rng.randn(n, p) * .1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2])
@@ -239,10 +237,8 @@ def test_infer_dim_1():
 
 
 def test_infer_dim_2():
-    """TODO: explain what this is testing
-
-    Or at least use explicit variable names...
-    """
+    # TODO: explain what this is testing
+    # Or at least use explicit variable names...
     n, p = 1000, 5
     rng = np.random.RandomState(0)
     X = rng.randn(n, p) * .1
@@ -255,8 +251,6 @@ def test_infer_dim_2():
 
 
 def test_infer_dim_3():
-    """
-    """
     n, p = 100, 5
     rng = np.random.RandomState(0)
     X = rng.randn(n, p) * .1
@@ -290,7 +284,7 @@ def test_infer_dim_by_explained_variance():
 
 
 def test_pca_score():
-    """Test that probabilistic PCA scoring yields a reasonable score"""
+    # Test that probabilistic PCA scoring yields a reasonable score
     n, p = 1000, 3
     rng = np.random.RandomState(0)
     X = rng.randn(n, p) * .1 + np.array([3, 4, 5])
@@ -302,7 +296,7 @@ def test_pca_score():
 
 
 def test_pca_score2():
-    """Test that probabilistic PCA correctly separated different datasets"""
+    # Test that probabilistic PCA correctly separated different datasets
     n, p = 100, 3
     rng = np.random.RandomState(0)
     X = rng.randn(n, p) * .1 + np.array([3, 4, 5])
@@ -320,7 +314,7 @@ def test_pca_score2():
 
 
 def test_pca_score3():
-    """Check that probabilistic PCA selects the right model"""
+    # Check that probabilistic PCA selects the right model
     n, p = 200, 3
     rng = np.random.RandomState(0)
     Xl = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5])
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 96cee5a690331..2e2eba08b7696 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -89,10 +89,8 @@ def test_fit_transform_parallel():
 
 
 def test_transform_nan():
-    """
-    Test that SparsePCA won't return NaN when there is 0 feature in all
-    samples.
-    """
+    # Test that SparsePCA won't return NaN when there is 0 feature in all
+    # samples.
     rng = np.random.RandomState(0)
     Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
     Y[:, 0] = 0
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index bc82ac2f9e2be..36eb50a78ce33 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -50,7 +50,7 @@
 
 
 def test_classification():
-    """Check classification for various parameter settings."""
+    # Check classification for various parameter settings.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                         iris.target,
@@ -73,7 +73,7 @@ def test_classification():
 
 
 def test_sparse_classification():
-    """Check classification for various parameter settings on sparse input."""
+    # Check classification for various parameter settings on sparse input.
 
     class CustomSVC(SVC):
         """SVC variant that records the nature of the training set"""
@@ -132,7 +132,7 @@ def fit(self, X, y):
 
 
 def test_regression():
-    """Check regression for various parameter settings."""
+    # Check regression for various parameter settings.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                         boston.target[:50],
@@ -154,7 +154,7 @@ def test_regression():
 
 
 def test_sparse_regression():
-    """Check regression for various parameter settings on sparse input."""
+    # Check regression for various parameter settings on sparse input.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                         boston.target[:50],
@@ -214,7 +214,7 @@ def fit(self, X, y):
 
 
 def test_bootstrap_samples():
-    """Test that bootstraping samples generate non-perfect base estimators."""
+    # Test that bootstraping samples generate non-perfect base estimators.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                         boston.target,
@@ -242,7 +242,7 @@ def test_bootstrap_samples():
 
 
 def test_bootstrap_features():
-    """Test that bootstraping features may generate dupplicate features."""
+    # Test that bootstraping features may generate dupplicate features.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                         boston.target,
@@ -266,7 +266,7 @@ def test_bootstrap_features():
 
 
 def test_probability():
-    """Predict probabilities."""
+    # Predict probabilities.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                         iris.target,
@@ -298,8 +298,8 @@ def test_probability():
 
 
 def test_oob_score_classification():
-    """Check that oob prediction is a good estimation of the generalization
-    error."""
+    # Check that oob prediction is a good estimation of the generalization
+    # error.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                         iris.target,
@@ -328,8 +328,8 @@ def test_oob_score_classification():
 
 
 def test_oob_score_regression():
-    """Check that oob prediction is a good estimation of the generalization
-    error."""
+    # Check that oob prediction is a good estimation of the generalization
+    # error.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                         boston.target,
@@ -357,7 +357,7 @@ def test_oob_score_regression():
 
 
 def test_single_estimator():
-    """Check singleton ensembles."""
+    # Check singleton ensembles.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                         boston.target,
@@ -375,7 +375,7 @@ def test_single_estimator():
 
 
 def test_error():
-    """Test that it gives proper exception on deficient input."""
+    # Test that it gives proper exception on deficient input.
     X, y = iris.data, iris.target
     base = DecisionTreeClassifier()
 
@@ -408,7 +408,7 @@ def test_error():
 
 
 def test_parallel_classification():
-    """Check parallel classification."""
+    # Check parallel classification.
     rng = check_random_state(0)
 
     # Classification
@@ -454,7 +454,7 @@ def test_parallel_classification():
 
 
 def test_parallel_regression():
-    """Check parallel regression."""
+    # Check parallel regression.
     rng = check_random_state(0)
 
     X_train, X_test, y_train, y_test = train_test_split(boston.data,
@@ -480,7 +480,7 @@ def test_parallel_regression():
 
 
 def test_gridsearch():
-    """Check that bagging ensembles can be grid-searched."""
+    # Check that bagging ensembles can be grid-searched.
     # Transform iris into a binary classification task
     X, y = iris.data, iris.target
     y[y == 2] = 1
@@ -495,7 +495,7 @@ def test_gridsearch():
 
 
 def test_base_estimator():
-    """Check base_estimator and its default values."""
+    # Check base_estimator and its default values.
     rng = check_random_state(0)
 
     # Classification
diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py
index 9a1765819b1aa..0268715cde9ef 100644
--- a/sklearn/ensemble/tests/test_base.py
+++ b/sklearn/ensemble/tests/test_base.py
@@ -15,7 +15,7 @@
 
 
 def test_base():
-    """Check BaseEnsemble methods."""
+    # Check BaseEnsemble methods.
     ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=3)
 
     iris = load_iris()
@@ -34,8 +34,8 @@ def test_base():
 
 
 def test_base_zero_n_estimators():
-    """Check that instantiating a BaseEnsemble with n_estimators<=0 raises
-    a ValueError."""
+    # Check that instantiating a BaseEnsemble with n_estimators<=0 raises
+    # a ValueError.
     ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=0)
     iris = load_iris()
     assert_raise_message(ValueError,
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index cd0697af20500..33aa5cb3e4050 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -105,7 +105,7 @@ def test_classification_toy():
 
 
 def check_iris_criterion(name, criterion):
-    """Check consistency on dataset iris."""
+    # Check consistency on dataset iris.
     ForestClassifier = FOREST_CLASSIFIERS[name]
 
     clf = ForestClassifier(n_estimators=10, criterion=criterion,
@@ -129,7 +129,7 @@ def test_iris():
 
 
 def check_boston_criterion(name, criterion):
-    """Check consistency on dataset boston house prices."""
+    # Check consistency on dataset boston house prices.
     ForestRegressor = FOREST_REGRESSORS[name]
 
     clf = ForestRegressor(n_estimators=5, criterion=criterion, random_state=1)
@@ -152,7 +152,7 @@ def test_boston():
 
 
 def check_regressor_attributes(name):
-    """Regression models should not have a classes_ attribute."""
+    # Regression models should not have a classes_ attribute.
     r = FOREST_REGRESSORS[name](random_state=0)
     assert_false(hasattr(r, "classes_"))
     assert_false(hasattr(r, "n_classes_"))
@@ -168,7 +168,7 @@ def test_regressor_attributes():
 
 
 def check_probability(name):
-    """Predict probabilities."""
+    # Predict probabilities.
     ForestClassifier = FOREST_CLASSIFIERS[name]
     with np.errstate(divide="ignore"):
         clf = ForestClassifier(n_estimators=10, random_state=1, max_features=1,
@@ -186,7 +186,7 @@ def test_probability():
 
 
 def check_importances(name, X, y):
-    """Check variable importances."""
+    # Check variable importances.
 
     ForestClassifier = FOREST_CLASSIFIERS[name]
     for n_jobs in [1, 2]:
@@ -236,8 +236,8 @@ def test_unfitted_feature_importances():
 
 
 def check_oob_score(name, X, y, n_estimators=20):
-    """Check that oob prediction is a good estimation of the generalization
-       error."""
+    # Check that oob prediction is a good estimation of the generalization
+    # error.
     # Proper behavior
     est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0,
                                   n_estimators=n_estimators, bootstrap=True)
@@ -304,7 +304,7 @@ def check_gridsearch(name):
 
 
 def test_gridsearch():
-    """Check that base trees can be grid-searched."""
+    # Check that base trees can be grid-searched.
     for name in FOREST_CLASSIFIERS:
         yield check_gridsearch, name
 
@@ -333,7 +333,7 @@ def test_parallel():
 
 
 def check_pickle(name, X, y):
-    """Check pickability."""
+    # Check pickability.
 
     ForestEstimator = FOREST_ESTIMATORS[name]
     obj = ForestEstimator(random_state=0)
@@ -356,7 +356,7 @@ def test_pickle():
 
 
 def check_multioutput(name):
-    """Check estimators on multi-output problems."""
+    # Check estimators on multi-output problems.
 
     X_train = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-2, 1],
                [-1, 1], [-1, 2], [2, -1], [1, -1], [1, -2]]
@@ -391,7 +391,7 @@ def test_multioutput():
 
 
 def check_classes_shape(name):
-    """Test that n_classes_ and classes_ have proper shape."""
+    # Test that n_classes_ and classes_ have proper shape.
     ForestClassifier = FOREST_CLASSIFIERS[name]
 
     # Classification, single output
@@ -554,7 +554,7 @@ def test_distribution():
 
 
 def check_max_leaf_nodes_max_depth(name, X, y):
-    """Test precedence of max_leaf_nodes over max_depth. """
+    # Test precedence of max_leaf_nodes over max_depth.
     ForestEstimator = FOREST_ESTIMATORS[name]
     est = ForestEstimator(max_depth=1, max_leaf_nodes=4,
                           n_estimators=1).fit(X, y)
@@ -571,7 +571,7 @@ def test_max_leaf_nodes_max_depth():
 
 
 def check_min_samples_leaf(name, X, y):
-    """Test if leaves contain more than leaf_count training examples"""
+    # Test if leaves contain more than leaf_count training examples
     ForestEstimator = FOREST_ESTIMATORS[name]
 
     # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
@@ -597,8 +597,8 @@ def test_min_samples_leaf():
 
 
 def check_min_weight_fraction_leaf(name, X, y):
-    """Test if leaves contain at least min_weight_fraction_leaf of the
-    training set"""
+    # Test if leaves contain at least min_weight_fraction_leaf of the
+    # training set
     ForestEstimator = FOREST_ESTIMATORS[name]
     rng = np.random.RandomState(0)
     weights = rng.rand(X.shape[0])
@@ -671,7 +671,7 @@ def test_sparse_input():
 
 
 def check_memory_layout(name, dtype):
-    """Check that it works no matter the memory layout"""
+    # Check that it works no matter the memory layout
 
     est = FOREST_ESTIMATORS[name](random_state=0, bootstrap=False)
 
@@ -746,7 +746,7 @@ def test_1d_input():
 
 
 def check_class_weights(name):
-    """Check class_weights resemble sample_weights behavior."""
+    # Check class_weights resemble sample_weights behavior.
     ForestClassifier = FOREST_CLASSIFIERS[name]
 
     # Iris is balanced, so no effect expected for using 'auto' weights
@@ -794,7 +794,7 @@ def test_class_weights():
 
 
 def check_class_weight_auto_and_bootstrap_multi_output(name):
-    """Test class_weight works for multi-output"""
+    # Test class_weight works for multi-output
     ForestClassifier = FOREST_CLASSIFIERS[name]
     _y = np.vstack((y, np.array(y) * 2)).T
     clf = ForestClassifier(class_weight='auto', random_state=0)
@@ -812,7 +812,7 @@ def test_class_weight_auto_and_bootstrap_multi_output():
 
 
 def check_class_weight_errors(name):
-    """Test if class_weight raises errors and warnings when expected."""
+    # Test if class_weight raises errors and warnings when expected.
     ForestClassifier = FOREST_CLASSIFIERS[name]
     _y = np.vstack((y, np.array(y) * 2)).T
 
@@ -842,8 +842,8 @@ def test_class_weight_errors():
 
 
 def check_warm_start(name, random_state=42):
-    """Test if fitting incrementally with warm start gives a forest of the
-    right size and the same results as a normal fit."""
+    # Test if fitting incrementally with warm start gives a forest of the
+    # right size and the same results as a normal fit.
     X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
     ForestEstimator = FOREST_ESTIMATORS[name]
     clf_ws = None
@@ -874,8 +874,7 @@ def test_warm_start():
 
 
 def check_warm_start_clear(name):
-    """Test if fit clears state and grows a new forest when warm_start==False.
-    """
+    # Test if fit clears state and grows a new forest when warm_start==False.
     X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
     ForestEstimator = FOREST_ESTIMATORS[name]
     clf = ForestEstimator(n_estimators=5, max_depth=1, warm_start=False,
@@ -897,7 +896,7 @@ def test_warm_start_clear():
 
 
 def check_warm_start_smaller_n_estimators(name):
-    """Test if warm start second fit with smaller n_estimators raises error."""
+    # Test if warm start second fit with smaller n_estimators raises error.
     X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
     ForestEstimator = FOREST_ESTIMATORS[name]
     clf = ForestEstimator(n_estimators=5, max_depth=1, warm_start=True)
@@ -912,8 +911,8 @@ def test_warm_start_smaller_n_estimators():
 
 
 def check_warm_start_equal_n_estimators(name):
-    """Test if warm start with equal n_estimators does nothing and returns the
-    same forest and raises a warning."""
+    # Test if warm start with equal n_estimators does nothing and returns the
+    # same forest and raises a warning.
     X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
     ForestEstimator = FOREST_ESTIMATORS[name]
     clf = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True,
@@ -938,7 +937,7 @@ def test_warm_start_equal_n_estimators():
 
 
 def check_warm_start_oob(name):
-    """Test that the warm start computes oob score when asked."""
+    # Test that the warm start computes oob score when asked.
     X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
     ForestEstimator = FOREST_ESTIMATORS[name]
     # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning.
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index c1133dc4c0d54..06aa68aaf4db4 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -46,7 +46,7 @@
 
 
 def test_classification_toy():
-    """Check classification on a toy dataset."""
+    # Check classification on a toy dataset.
 
     for loss in ('deviance', 'exponential'):
         clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
@@ -64,7 +64,7 @@ def test_classification_toy():
 
 
 def test_parameter_checks():
-    """Check input parameter validation."""
+    # Check input parameter validation.
 
     assert_raises(ValueError,
                   GradientBoostingClassifier(n_estimators=0).fit, X, y)
@@ -138,8 +138,8 @@ def test_loss_function():
 
 
 def test_classification_synthetic():
-    """Test GradientBoostingClassifier on synthetic dataset used by
-    Hastie et al. in ESLII Example 12.7. """
+    # Test GradientBoostingClassifier on synthetic dataset used by
+    # Hastie et al. in ESLII Example 12.7.
     X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
 
     X_train, X_test = X[:2000], X[2000:]
@@ -166,8 +166,8 @@ def test_classification_synthetic():
 
 
 def test_boston():
-    """Check consistency on dataset boston house prices with least squares
-    and least absolute deviation. """
+    # Check consistency on dataset boston house prices with least squares
+    # and least absolute deviation.
     for loss in ("ls", "lad", "huber"):
         for subsample in (1.0, 0.5):
             last_y_pred = None
@@ -196,7 +196,7 @@ def test_boston():
 
 
 def test_iris():
-    """Check consistency on dataset iris."""
+    # Check consistency on dataset iris.
     for subsample in (1.0, 0.5):
         for sample_weight in (None, np.ones(len(iris.target))):
             clf = GradientBoostingClassifier(n_estimators=100, loss='deviance',
@@ -208,8 +208,8 @@ def test_iris():
 
 
 def test_regression_synthetic():
-    """Test on synthetic regression datasets used in Leo Breiman,
-    `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). """
+    # Test on synthetic regression datasets used in Leo Breiman,
+    # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996).
     random_state = check_random_state(1)
     regression_params = {'n_estimators': 100, 'max_depth': 4,
                          'min_samples_split': 1, 'learning_rate': 0.1,
@@ -266,7 +266,7 @@ def test_feature_importances():
 
 
 def test_probability_log():
-    """Predict probabilities."""
+    # Predict probabilities.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
 
     assert_raises(ValueError, clf.predict_proba, T)
@@ -285,7 +285,7 @@ def test_probability_log():
 
 
 def test_check_inputs():
-    """Test input checks (shape and type of X and y)."""
+    # Test input checks (shape and type of X and y).
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     assert_raises(ValueError, clf.fit, X, y + [0, 1])
 
@@ -303,7 +303,7 @@ def test_check_inputs():
 
 
 def test_check_inputs_predict():
-    """X has wrong shape """
+    # X has wrong shape
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     clf.fit(X, y)
 
@@ -330,7 +330,7 @@ def test_check_inputs_predict():
 
 
 def test_check_max_features():
-    """test if max_features is valid. """
+    # test if max_features is valid.
     clf = GradientBoostingRegressor(n_estimators=100, random_state=1,
                                     max_features=0)
     assert_raises(ValueError, clf.fit, X, y)
@@ -344,7 +344,7 @@ def test_check_max_features():
     assert_raises(ValueError, clf.fit, X, y)
 
 def test_max_feature_regression():
-    """Test to make sure random state is set properly. """
+    # Test to make sure random state is set properly.
     X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
 
     X_train, X_test = X[:2000], X[2000:]
@@ -359,7 +359,7 @@ def test_max_feature_regression():
 
 
 def test_max_feature_auto():
-    """Test if max features is set properly for floats and str. """
+    # Test if max features is set properly for floats and str.
     X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
     _, n_features = X.shape
 
@@ -393,9 +393,8 @@ def test_max_feature_auto():
 
 
 def test_staged_predict():
-    """Test whether staged decision function eventually gives
-    the same prediction.
-    """
+    # Test whether staged decision function eventually gives
+    # the same prediction.
     X, y = datasets.make_friedman1(n_samples=1200,
                                    random_state=1, noise=1.0)
     X_train, y_train = X[:200], y[:200]
@@ -416,9 +415,8 @@ def test_staged_predict():
 
 
 def test_staged_predict_proba():
-    """Test whether staged predict proba eventually gives
-    the same prediction.
-    """
+    # Test whether staged predict proba eventually gives
+    # the same prediction.
     X, y = datasets.make_hastie_10_2(n_samples=1200,
                                      random_state=1)
     X_train, y_train = X[:200], y[:200]
@@ -463,7 +461,7 @@ def test_staged_functions_defensive():
 
 
 def test_serialization():
-    """Check model serialization."""
+    # Check model serialization.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
 
     clf.fit(X, y)
@@ -483,7 +481,7 @@ def test_serialization():
 
 
 def test_degenerate_targets():
-    """Check if we can fit even though all targets are equal. """
+    # Check if we can fit even though all targets are equal.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
 
     # classifier should raise exception
@@ -497,7 +495,7 @@ def test_degenerate_targets():
 
 
 def test_quantile_loss():
-    """Check if quantile loss with alpha=0.5 equals lad. """
+    # Check if quantile loss with alpha=0.5 equals lad.
     clf_quantile = GradientBoostingRegressor(n_estimators=100, loss='quantile',
                                              max_depth=4, alpha=0.5,
                                              random_state=7)
@@ -514,7 +512,7 @@ def test_quantile_loss():
 
 
 def test_symbol_labels():
-    """Test with non-integer class labels. """
+    # Test with non-integer class labels.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
 
     symbol_y = tosequence(map(str, y))
@@ -525,7 +523,7 @@ def test_symbol_labels():
 
 
 def test_float_class_labels():
-    """Test with float class labels. """
+    # Test with float class labels.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
 
     float_y = np.asarray(y, dtype=np.float32)
@@ -537,7 +535,7 @@ def test_float_class_labels():
 
 
 def test_shape_y():
-    """Test with float class labels. """
+    # Test with float class labels.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
 
     y_ = np.asarray(y, dtype=np.int32)
@@ -552,7 +550,7 @@ def test_shape_y():
 
 
 def test_mem_layout():
-    """Test with different memory layouts of X and y"""
+    # Test with different memory layouts of X and y
     X_ = np.asfortranarray(X)
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     clf.fit(X_, y)
@@ -581,7 +579,7 @@ def test_mem_layout():
 
 
 def test_oob_improvement():
-    """Test if oob improvement has correct shape and regression test. """
+    # Test if oob improvement has correct shape and regression test.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                      subsample=0.5)
     clf.fit(X, y)
@@ -593,7 +591,7 @@ def test_oob_improvement():
 
 
 def test_oob_improvement_raise():
-    """Test if oob improvement has correct shape. """
+    # Test if oob improvement has correct shape.
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                      subsample=1.0)
     clf.fit(X, y)
@@ -601,7 +599,7 @@ def test_oob_improvement_raise():
 
 
 def test_oob_multilcass_iris():
-    """Check OOB improvement on multi-class dataset."""
+    # Check OOB improvement on multi-class dataset.
     clf = GradientBoostingClassifier(n_estimators=100, loss='deviance',
                                      random_state=1, subsample=0.5)
     clf.fit(iris.data, iris.target)
@@ -618,7 +616,7 @@ def test_oob_multilcass_iris():
 
 
 def test_verbose_output():
-    """Check verbose=1 does not cause error. """
+    # Check verbose=1 does not cause error.
     from sklearn.externals.six.moves import cStringIO as StringIO
     import sys
     old_stdout = sys.stdout
@@ -643,7 +641,7 @@ def test_verbose_output():
 
 
 def test_more_verbose_output():
-    """Check verbose=2 does not cause error. """
+    # Check verbose=2 does not cause error.
     from sklearn.externals.six.moves import cStringIO as StringIO
     import sys
     old_stdout = sys.stdout
@@ -668,7 +666,7 @@ def test_more_verbose_output():
 
 
 def test_warm_start():
-    """Test if warm start equals fit. """
+    # Test if warm start equals fit.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=200, max_depth=1)
@@ -683,7 +681,7 @@ def test_warm_start():
 
 
 def test_warm_start_n_estimators():
-    """Test if warm start equals fit - set n_estimators. """
+    # Test if warm start equals fit - set n_estimators.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=300, max_depth=1)
@@ -698,7 +696,7 @@ def test_warm_start_n_estimators():
 
 
 def test_warm_start_max_depth():
-    """Test if possible to fit trees of different depth in ensemble. """
+    # Test if possible to fit trees of different depth in ensemble.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=100, max_depth=1, warm_start=True)
@@ -713,7 +711,7 @@ def test_warm_start_max_depth():
 
 
 def test_warm_start_clear():
-    """Test if fit clears state. """
+    # Test if fit clears state.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=100, max_depth=1)
@@ -728,7 +726,7 @@ def test_warm_start_clear():
 
 
 def test_warm_start_zero_n_estimators():
-    """Test if warm start with zero n_estimators raises error """
+    # Test if warm start with zero n_estimators raises error
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=100, max_depth=1, warm_start=True)
@@ -738,7 +736,7 @@ def test_warm_start_zero_n_estimators():
 
 
 def test_warm_start_smaller_n_estimators():
-    """Test if warm start with smaller n_estimators raises error """
+    # Test if warm start with smaller n_estimators raises error
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=100, max_depth=1, warm_start=True)
@@ -748,7 +746,7 @@ def test_warm_start_smaller_n_estimators():
 
 
 def test_warm_start_equal_n_estimators():
-    """Test if warm start with equal n_estimators does nothing """
+    # Test if warm start with equal n_estimators does nothing
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=100, max_depth=1)
@@ -762,7 +760,7 @@ def test_warm_start_equal_n_estimators():
 
 
 def test_warm_start_oob_switch():
-    """Test if oob can be turned on during warm start. """
+    # Test if oob can be turned on during warm start.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=100, max_depth=1, warm_start=True)
@@ -777,7 +775,7 @@ def test_warm_start_oob_switch():
 
 
 def test_warm_start_oob():
-    """Test if warm start OOB equals fit. """
+    # Test if warm start OOB equals fit.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
         est = Cls(n_estimators=200, max_depth=1, subsample=0.5,
@@ -803,7 +801,7 @@ def early_stopping_monitor(i, est, locals):
 
 
 def test_monitor_early_stopping():
-    """Test if monitor return value works. """
+    # Test if monitor return value works.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
 
     for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
@@ -839,7 +837,7 @@ def test_monitor_early_stopping():
 
 
 def test_complete_classification():
-    """Test greedy trees with max_depth + 1 leafs. """
+    # Test greedy trees with max_depth + 1 leafs.
     from sklearn.tree._tree import TREE_LEAF
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     k = 4
@@ -855,7 +853,7 @@ def test_complete_classification():
 
 
 def test_complete_regression():
-    """Test greedy trees with max_depth + 1 leafs. """
+    # Test greedy trees with max_depth + 1 leafs.
     from sklearn.tree._tree import TREE_LEAF
     k = 4
 
@@ -869,7 +867,7 @@ def test_complete_regression():
 
 
 def test_zero_estimator_reg():
-    """Test if ZeroEstimator works for regression. """
+    # Test if ZeroEstimator works for regression.
     est = GradientBoostingRegressor(n_estimators=20, max_depth=1,
                                     random_state=1, init=ZeroEstimator())
     est.fit(boston.data, boston.target)
@@ -890,7 +888,7 @@ def test_zero_estimator_reg():
 
 
 def test_zero_estimator_clf():
-    """Test if ZeroEstimator works for classification. """
+    # Test if ZeroEstimator works for classification.
     X = iris.data
     y = np.array(iris.target)
     est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
@@ -920,7 +918,7 @@ def test_zero_estimator_clf():
 
 
 def test_max_leaf_nodes_max_depth():
-    """Test preceedence of max_leaf_nodes over max_depth. """
+    # Test preceedence of max_leaf_nodes over max_depth.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     all_estimators = [GradientBoostingRegressor,
                       GradientBoostingClassifier]
@@ -937,10 +935,8 @@ def test_max_leaf_nodes_max_depth():
 
 
 def test_warm_start_wo_nestimators_change():
-    """Test if warm_start does nothing if n_estimators is not changed.
-
-    Regression test for #3513.
-    """
+    # Test if warm_start does nothing if n_estimators is not changed.
+    # Regression test for #3513.
     clf = GradientBoostingClassifier(n_estimators=10, warm_start=True)
     clf.fit([[0, 1], [2, 3]], [0, 1])
     assert clf.estimators_.shape[0] == 10
@@ -949,7 +945,7 @@ def test_warm_start_wo_nestimators_change():
 
 
 def test_probability_exponential():
-    """Predict probabilities."""
+    # Predict probabilities.
     clf = GradientBoostingClassifier(loss='exponential',
                                      n_estimators=100, random_state=1)
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
index 1c811aeba9af4..c2b4806ec496b 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
@@ -19,10 +19,8 @@
 
 
 def test_binomial_deviance():
-    """Check binomial deviance loss.
-
-    Check against alternative definitions in ESLII.
-    """
+    # Check binomial deviance loss.
+    # Check against alternative definitions in ESLII.
     bd = BinomialDeviance(2)
 
     # pred has the same BD for y in {0, 1}
@@ -55,7 +53,7 @@ def test_binomial_deviance():
 
 
 def test_log_odds_estimator():
-    """Check log odds estimator. """
+    # Check log odds estimator.
     est = LogOddsEstimator()
     assert_raises(ValueError, est.fit, None, np.array([1]))
 
@@ -78,7 +76,7 @@ def test_sample_weight_smoke():
 
 
 def test_sample_weight_init_estimators():
-    """Smoke test for init estimators with sample weights. """
+    # Smoke test for init estimators with sample weights.
     rng = check_random_state(13)
     X = rng.rand(100, 2)
     sample_weight = np.ones(100)
@@ -145,7 +143,7 @@ def test_weighted_percentile_zero_weight():
 
 
 def test_sample_weight_deviance():
-    """Test if deviance supports sample weights. """
+    # Test if deviance supports sample weights.
     rng = check_random_state(13)
     X = rng.rand(100, 2)
     sample_weight = np.ones(100)
diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py
index feebbe2480c0e..cec7efc46f03b 100644
--- a/sklearn/ensemble/tests/test_partial_dependence.py
+++ b/sklearn/ensemble/tests/test_partial_dependence.py
@@ -28,7 +28,7 @@
 
 
 def test_partial_dependence_classifier():
-    """Test partial dependence for classifier """
+    # Test partial dependence for classifier
     clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
     clf.fit(X, y)
 
@@ -48,7 +48,7 @@ def test_partial_dependence_classifier():
 
 
 def test_partial_dependence_multiclass():
-    """Test partial dependence for multi-class classifier """
+    # Test partial dependence for multi-class classifier
     clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
     clf.fit(iris.data, iris.target)
 
@@ -63,7 +63,7 @@ def test_partial_dependence_multiclass():
 
 
 def test_partial_dependence_regressor():
-    """Test partial dependence for regressor """
+    # Test partial dependence for regressor
     clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
     clf.fit(boston.data, boston.target)
 
@@ -76,7 +76,7 @@ def test_partial_dependence_regressor():
 
 
 def test_partial_dependecy_input():
-    """Test input validation of partial dependence. """
+    # Test input validation of partial dependence.
     clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
     clf.fit(X, y)
 
@@ -105,7 +105,7 @@ def test_partial_dependecy_input():
 
 @if_matplotlib
 def test_plot_partial_dependence():
-    """Test partial dependence plot function. """
+    # Test partial dependence plot function.
     clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
     clf.fit(boston.data, boston.target)
 
@@ -137,7 +137,7 @@ def test_plot_partial_dependence():
 
 @if_matplotlib
 def test_plot_partial_dependence_input():
-    """Test partial dependence plot function input checks. """
+    # Test partial dependence plot function input checks.
     clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
 
     # not fitted yet
@@ -172,7 +172,7 @@ def test_plot_partial_dependence_input():
 
 @if_matplotlib
 def test_plot_partial_dependence_multiclass():
-    """Test partial dependence plot function on multi-class input. """
+    # Test partial dependence plot function on multi-class input.
     clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
     clf.fit(iris.data, iris.target)
 
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index 608007623d81d..8a5a99dfa5d37 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -44,7 +44,7 @@
 
 
 def test_classification_toy():
-    """Check classification on a toy dataset."""
+    # Check classification on a toy dataset.
     for alg in ['SAMME', 'SAMME.R']:
         clf = AdaBoostClassifier(algorithm=alg, random_state=0)
         clf.fit(X, y_class)
@@ -55,14 +55,14 @@ def test_classification_toy():
 
 
 def test_regression_toy():
-    """Check classification on a toy dataset."""
+    # Check classification on a toy dataset.
     clf = AdaBoostRegressor(random_state=0)
     clf.fit(X, y_regr)
     assert_array_equal(clf.predict(T), y_t_regr)
 
 
 def test_iris():
-    """Check consistency on dataset iris."""
+    # Check consistency on dataset iris.
     classes = np.unique(iris.target)
     clf_samme = prob_samme = None
 
@@ -91,7 +91,7 @@ def test_iris():
 
 
 def test_boston():
-    """Check consistency on dataset boston house prices."""
+    # Check consistency on dataset boston house prices.
     clf = AdaBoostRegressor(random_state=0)
     clf.fit(boston.data, boston.target)
     score = clf.score(boston.data, boston.target)
@@ -99,7 +99,7 @@ def test_boston():
 
 
 def test_staged_predict():
-    """Check staged predictions."""
+    # Check staged predictions.
     rng = np.random.RandomState(0)
     iris_weights = rng.randint(10, size=iris.target.shape)
     boston_weights = rng.randint(10, size=boston.target.shape)
@@ -143,7 +143,7 @@ def test_staged_predict():
 
 
 def test_gridsearch():
-    """Check that base trees can be grid-searched."""
+    # Check that base trees can be grid-searched.
     # AdaBoost classification
     boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
     parameters = {'n_estimators': (1, 2),
@@ -162,7 +162,7 @@ def test_gridsearch():
 
 
 def test_pickle():
-    """Check pickability."""
+    # Check pickability.
     import pickle
 
     # Adaboost classifier
@@ -190,7 +190,7 @@ def test_pickle():
 
 
 def test_importances():
-    """Check variable importances."""
+    # Check variable importances.
     X, y = datasets.make_classification(n_samples=2000,
                                         n_features=10,
                                         n_informative=3,
@@ -211,7 +211,7 @@ def test_importances():
 
 
 def test_error():
-    """Test that it gives proper exception on deficient input."""
+    # Test that it gives proper exception on deficient input.
     assert_raises(ValueError,
                   AdaBoostClassifier(learning_rate=-1).fit,
                   X, y_class)
@@ -226,7 +226,7 @@ def test_error():
 
 
 def test_base_estimator():
-    """Test different base estimators."""
+    # Test different base estimators.
     from sklearn.ensemble import RandomForestClassifier
     from sklearn.svm import SVC
 
@@ -273,7 +273,7 @@ def test_sample_weight_missing():
 
 
 def test_sparse_classification():
-    """Check classification with sparse input."""
+    # Check classification with sparse input.
 
     class CustomSVC(SVC):
         """SVC variant that records the nature of the training set."""
@@ -371,7 +371,7 @@ def fit(self, X, y, sample_weight=None):
 
 
 def test_sparse_regression():
-    """Check regression with sparse input."""
+    # Check regression with sparse input.
 
     class CustomSVR(SVR):
         """SVR variant that records the nature of the training set."""
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
index 2860eacb35640..c12919762aa14 100644
--- a/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -76,13 +76,13 @@ def test_hasher_invalid_input():
 
 
 def test_hasher_set_params():
-    """Test delayed input validation in fit (useful for grid search)."""
+    # Test delayed input validation in fit (useful for grid search).
     hasher = FeatureHasher()
     hasher.set_params(n_features=np.inf)
     assert_raises(TypeError, hasher.fit)
 
 
 def test_hasher_zeros():
-    """Assert that no zeros are materialized in the output."""
+    # Assert that no zeros are materialized in the output.
     X = FeatureHasher().transform([{'foo': 0}])
     assert_equal(X.data.shape, (0,))
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 5b67e5f5d7a3a..c1cac567ce4fd 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -561,7 +561,7 @@ def test_vectorizer_max_features():
 
 
 def test_count_vectorizer_max_features():
-    """Regression test: max_features didn't work correctly in 0.14."""
+    # Regression test: max_features didn't work correctly in 0.14.
 
     cv_1 = CountVectorizer(max_features=1)
     cv_3 = CountVectorizer(max_features=3)
@@ -860,8 +860,7 @@ def test_pickling_vectorizer():
 
 
 def test_stop_words_removal():
-    """Ensure that deleting the stop_words_ attribute doesn't affect transform
-    """
+    # Ensure that deleting the stop_words_ attribute doesn't affect transform
 
     fitted_vectorizers = (
         TfidfVectorizer().fit(JUNK_FOOD_DOCS),
diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py
index 06ffdff629a50..baaa4907bad5e 100644
--- a/sklearn/feature_selection/tests/test_chi2.py
+++ b/sklearn/feature_selection/tests/test_chi2.py
@@ -29,7 +29,7 @@ def mkchi2(k):
 
 
 def test_chi2():
-    """Test Chi2 feature extraction"""
+    # Test Chi2 feature extraction
 
     chi2 = mkchi2(k=1).fit(X, y)
     chi2 = mkchi2(k=1).fit(X, y)
@@ -52,24 +52,22 @@ def test_chi2():
 
 
 def test_chi2_coo():
-    """Check that chi2 works with a COO matrix
-
-    (as returned by CountVectorizer, DictVectorizer)
-    """
+    # Check that chi2 works with a COO matrix
+    # (as returned by CountVectorizer, DictVectorizer)
     Xcoo = coo_matrix(X)
     mkchi2(k=2).fit_transform(Xcoo, y)
     # if we got here without an exception, we're safe
 
 
 def test_chi2_negative():
-    """Check for proper error on negative numbers in the input X."""
+    # Check for proper error on negative numbers in the input X.
     X, y = [[0, 1], [-1e-20, 1]], [0, 1]
     for X in (X, np.array(X), csr_matrix(X)):
         assert_raises(ValueError, chi2, X, y)
 
 
 def test_chisquare():
-    """Test replacement for scipy.stats.chisquare against the original."""
+    # Test replacement for scipy.stats.chisquare against the original.
     obs = np.array([[2., 2.],
                     [1., 1.]])
     exp = np.array([[1.5, 1.5],
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 4b91b0b311b03..adc289888fa1f 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -34,7 +34,7 @@
 # Test the score functions
 
 def test_f_oneway_vs_scipy_stats():
-    """Test that our f_oneway gives the same result as scipy.stats"""
+    # Test that our f_oneway gives the same result as scipy.stats
     rng = np.random.RandomState(0)
     X1 = rng.randn(10, 3)
     X2 = 1 + rng.randn(10, 3)
@@ -59,10 +59,8 @@ def test_f_oneway_ints():
 
 
 def test_f_classif():
-    """
-    Test whether the F test yields meaningful results
-    on a simple simulated classification problem
-    """
+    # Test whether the F test yields meaningful results
+    # on a simple simulated classification problem
     X, y = make_classification(n_samples=200, n_features=20,
                                n_informative=3, n_redundant=2,
                                n_repeated=0, n_classes=8,
@@ -81,10 +79,8 @@ def test_f_classif():
 
 
 def test_f_regression():
-    """
-    Test whether the F test yields meaningful results
-    on a simple simulated regression problem
-    """
+    # Test whether the F test yields meaningful results
+    # on a simple simulated regression problem
     X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
                            shuffle=False, random_state=0)
 
@@ -103,10 +99,8 @@ def test_f_regression():
 
 
 def test_f_regression_input_dtype():
-    """
-    Test whether f_regression returns the same value
-    for any numeric data_type
-    """
+    # Test whether f_regression returns the same value
+    # for any numeric data_type
     rng = np.random.RandomState(0)
     X = rng.rand(10, 20)
     y = np.arange(10).astype(np.int)
@@ -118,11 +112,9 @@ def test_f_regression_input_dtype():
 
 
 def test_f_regression_center():
-    """Test whether f_regression preserves dof according to 'center' argument
-
-    We use two centered variates so we have a simple relationship between
-    F-score with variates centering and F-score without variates centering.
-    """
+    # Test whether f_regression preserves dof according to 'center' argument
+    # We use two centered variates so we have a simple relationship between
+    # F-score with variates centering and F-score without variates centering.
     # Create toy example
     X = np.arange(-5, 6).reshape(-1, 1)  # X has zero mean
     n_samples = X.size
@@ -137,10 +129,8 @@ def test_f_regression_center():
 
 
 def test_f_classif_multi_class():
-    """
-    Test whether the F test yields meaningful results
-    on a simple simulated classification problem
-    """
+    # Test whether the F test yields meaningful results
+    # on a simple simulated classification problem
     X, y = make_classification(n_samples=200, n_features=20,
                                n_informative=3, n_redundant=2,
                                n_repeated=0, n_classes=8,
@@ -156,11 +146,9 @@ def test_f_classif_multi_class():
 
 
 def test_select_percentile_classif():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple classification problem
-    with the percentile heuristic
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the percentile heuristic
     X, y = make_classification(n_samples=200, n_features=20,
                                n_informative=3, n_redundant=2,
                                n_repeated=0, n_classes=8,
@@ -179,11 +167,9 @@ def test_select_percentile_classif():
 
 
 def test_select_percentile_classif_sparse():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple classification problem
-    with the percentile heuristic
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the percentile heuristic
     X, y = make_classification(n_samples=200, n_features=20,
                                n_informative=3, n_redundant=2,
                                n_repeated=0, n_classes=8,
@@ -213,11 +199,9 @@ def test_select_percentile_classif_sparse():
 # Test univariate selection in classification settings
 
 def test_select_kbest_classif():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple classification problem
-    with the k best heuristic
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the k best heuristic
     X, y = make_classification(n_samples=200, n_features=20,
                                n_informative=3, n_redundant=2,
                                n_repeated=0, n_classes=8,
@@ -236,9 +220,7 @@ def test_select_kbest_classif():
 
 
 def test_select_kbest_all():
-    """
-    Test whether k="all" correctly returns all features.
-    """
+    # Test whether k="all" correctly returns all features.
     X, y = make_classification(n_samples=20, n_features=10,
                                shuffle=False, random_state=0)
 
@@ -248,9 +230,7 @@ def test_select_kbest_all():
 
 
 def test_select_kbest_zero():
-    """
-    Test whether k=0 correctly returns no features.
-    """
+    # Test whether k=0 correctly returns no features.
     X, y = make_classification(n_samples=20, n_features=10,
                                shuffle=False, random_state=0)
 
@@ -265,11 +245,9 @@ def test_select_kbest_zero():
 
 
 def test_select_heuristics_classif():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple classification problem
-    with the fdr, fwe and fpr heuristics
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the fdr, fwe and fpr heuristics
     X, y = make_classification(n_samples=200, n_features=20,
                                n_informative=3, n_redundant=2,
                                n_repeated=0, n_classes=8,
@@ -300,11 +278,9 @@ def assert_best_scores_kept(score_filter):
 
 
 def test_select_percentile_regression():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple regression problem
-    with the percentile heuristic
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the percentile heuristic
     X, y = make_regression(n_samples=200, n_features=20,
                            n_informative=5, shuffle=False, random_state=0)
 
@@ -327,10 +303,8 @@ def test_select_percentile_regression():
 
 
 def test_select_percentile_regression_full():
-    """
-    Test whether the relative univariate feature selection
-    selects all features when '100%' is asked.
-    """
+    # Test whether the relative univariate feature selection
+    # selects all features when '100%' is asked.
     X, y = make_regression(n_samples=200, n_features=20,
                            n_informative=5, shuffle=False, random_state=0)
 
@@ -358,11 +332,9 @@ def test_invalid_percentile():
 
 
 def test_select_kbest_regression():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple regression problem
-    with the k best heuristic
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the k best heuristic
     X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
                            shuffle=False, random_state=0, noise=10)
 
@@ -379,11 +351,9 @@ def test_select_kbest_regression():
 
 
 def test_select_heuristics_regression():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple regression problem
-    with the fpr, fdr or fwe heuristics
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the fpr, fdr or fwe heuristics
     X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
                            shuffle=False, random_state=0, noise=10)
 
@@ -401,9 +371,7 @@ def test_select_heuristics_regression():
 
 
 def test_select_fdr_regression():
-    """
-    Test that fdr heuristic actually has low FDR.
-    """
+    # Test that fdr heuristic actually has low FDR.
     def single_fdr(alpha, n_informative, random_state):
         X, y = make_regression(n_samples=150, n_features=20,
                                n_informative=n_informative, shuffle=False,
@@ -445,11 +413,9 @@ def single_fdr(alpha, n_informative, random_state):
 
 
 def test_select_fwe_regression():
-    """
-    Test whether the relative univariate feature selection
-    gets the correct items in a simple regression problem
-    with the fwe heuristic
-    """
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the fwe heuristic
     X, y = make_regression(n_samples=200, n_features=20,
                            n_informative=5, shuffle=False, random_state=0)
 
@@ -466,10 +432,8 @@ def test_select_fwe_regression():
 
 
 def test_selectkbest_tiebreaking():
-    """Test whether SelectKBest actually selects k features in case of ties.
-
-    Prior to 0.11, SelectKBest would return more features than requested.
-    """
+    # Test whether SelectKBest actually selects k features in case of ties.
+    # Prior to 0.11, SelectKBest would return more features than requested.
     Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
     y = [1]
     dummy_score = lambda X, y: (X[0], X[0])
@@ -486,8 +450,7 @@ def test_selectkbest_tiebreaking():
 
 
 def test_selectpercentile_tiebreaking():
-    """Test if SelectPercentile selects the right n_features in case of ties.
-    """
+    # Test if SelectPercentile selects the right n_features in case of ties.
     Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
     y = [1]
     dummy_score = lambda X, y: (X[0], X[0])
@@ -504,7 +467,7 @@ def test_selectpercentile_tiebreaking():
 
 
 def test_tied_pvalues():
-    """Test whether k-best and percentiles work with tied pvalues from chi2."""
+    # Test whether k-best and percentiles work with tied pvalues from chi2.
     # chi2 will return the same p-values for the following features, but it
     # will return different scores.
     X0 = np.array([[10000, 9999, 9998], [1, 1, 1]])
@@ -522,7 +485,7 @@ def test_tied_pvalues():
 
 
 def test_tied_scores():
-    """Test for stable sorting in k-best with tied scores."""
+    # Test for stable sorting in k-best with tied scores.
     X_train = np.array([[0, 0, 0], [1, 1, 1]])
     y_train = [0, 1]
 
@@ -533,7 +496,7 @@ def test_tied_scores():
 
 
 def test_nans():
-    """Assert that SelectKBest and SelectPercentile can handle NaNs."""
+    # Assert that SelectKBest and SelectPercentile can handle NaNs.
     # First feature has zero variance to confuse f_classif (ANOVA) and
     # make it return a NaN.
     X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
@@ -567,7 +530,7 @@ def test_invalid_k():
 
 
 def test_f_classif_constant_feature():
-    """Test that f_classif warns if a feature is constant throughout."""
+    # Test that f_classif warns if a feature is constant throughout.
 
     X, y = make_classification(n_samples=10, n_features=5)
     X[:, 0] = 2.0
diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
index 10cae41f15e45..87af92d7fce15 100644
--- a/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -11,7 +11,7 @@
 
 
 def test_zero_variance():
-    """Test VarianceThreshold with default setting, zero variance."""
+    # Test VarianceThreshold with default setting, zero variance.
 
     for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
         sel = VarianceThreshold().fit(X)
@@ -22,7 +22,7 @@ def test_zero_variance():
 
 
 def test_variance_threshold():
-    """Test VarianceThreshold with custom variance."""
+    # Test VarianceThreshold with custom variance.
     for X in [data, csr_matrix(data)]:
         X = VarianceThreshold(threshold=.4).fit_transform(X)
         assert_equal((len(data), 1), X.shape)
diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index a3c09f4664891..36c041acbbc7f 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -25,12 +25,9 @@
 
 def test_1d(regr=regression.constant, corr=correlation.squared_exponential,
             random_start=10, beta0=None):
-    """
-    MLE estimation of a one-dimensional Gaussian Process model.
-    Check random start optimization.
-
-    Test the interpolating property.
-    """
+    # MLE estimation of a one-dimensional Gaussian Process model.
+    # Check random start optimization.
+    # Test the interpolating property.
     gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
                          theta0=1e-2, thetaL=1e-4, thetaU=1e-1,
                          random_start=random_start, verbose=False).fit(X, y)
@@ -43,12 +40,9 @@ def test_1d(regr=regression.constant, corr=correlation.squared_exponential,
 
 def test_2d(regr=regression.constant, corr=correlation.squared_exponential,
             random_start=10, beta0=None):
-    """
-    MLE estimation of a two-dimensional Gaussian Process model accounting for
-    anisotropy. Check random start optimization.
-
-    Test the interpolating property.
-    """
+    # MLE estimation of a two-dimensional Gaussian Process model accounting for
+    # anisotropy. Check random start optimization.
+    # Test the interpolating property.
     b, kappa, e = 5., .5, .1
     g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
     X = np.array([[-4.61611719, -6.00099547],
@@ -78,12 +72,9 @@ def test_2d(regr=regression.constant, corr=correlation.squared_exponential,
 
 def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential,
                random_start=10, beta0=None):
-    """
-    MLE estimation of a two-dimensional Gaussian Process model accounting for
-    anisotropy. Check random start optimization.
-
-    Test the GP interpolation for 2D output
-    """
+    # MLE estimation of a two-dimensional Gaussian Process model accounting for
+    # anisotropy. Check random start optimization.
+    # Test the GP interpolation for 2D output
     b, kappa, e = 5., .5, .1
     g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
     f = lambda x: np.vstack((g(x), g(x))).T
@@ -113,10 +104,8 @@ def test_wrong_number_of_outputs():
 
 
 def test_more_builtin_correlation_models(random_start=1):
-    """
-    Repeat test_1d and test_2d for several built-in correlation
-    models specified as strings.
-    """
+    # Repeat test_1d and test_2d for several built-in correlation
+    # models specified as strings.
     all_corr = ['absolute_exponential', 'squared_exponential', 'cubic',
                 'linear']
 
@@ -127,10 +116,8 @@ def test_more_builtin_correlation_models(random_start=1):
 
 
 def test_ordinary_kriging():
-    """
-    Repeat test_1d and test_2d with given regression weights (beta0) for
-    different regression models (Ordinary Kriging).
-    """
+    # Repeat test_1d and test_2d with given regression weights (beta0) for
+    # different regression models (Ordinary Kriging).
     test_1d(regr='linear', beta0=[0., 0.5])
     test_1d(regr='quadratic', beta0=[0., 0.5, 0.5])
     test_2d(regr='linear', beta0=[0., 0.5, 0.5])
@@ -146,10 +133,8 @@ def test_no_normalize():
 
 
 def test_random_starts():
-    """
-    Test that an increasing number of random-starts of GP fitting only
-    increases the reduced likelihood function of the optimal theta.
-    """
+    # Test that an increasing number of random-starts of GP fitting only
+    # increases the reduced likelihood function of the optimal theta.
     n_samples, n_features = 50, 3
     np.random.seed(0)
     rng = np.random.RandomState(0)
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index cf04c8f205cee..6fd9ef74ccc0d 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -17,9 +17,7 @@
 
 
 def test_linear_regression():
-    """
-    Test LinearRegression on a simple dataset.
-    """
+    # Test LinearRegression on a simple dataset.
     # a simple dataset
     X = [[1], [2]]
     Y = [1, 2]
@@ -43,9 +41,7 @@ def test_linear_regression():
 
 
 def test_fit_intercept():
-    """
-    Test assertions on betas shape.
-    """
+    # Test assertions on betas shape.
     X2 = np.array([[0.38349978, 0.61650022],
                    [0.58853682, 0.41146318]])
     X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
@@ -253,7 +249,7 @@ def test_sparse_center_data():
 
 
 def test_csr_sparse_center_data():
-    """Test output format of sparse_center_data, when input is csr"""
+    # Test output format of sparse_center_data, when input is csr
     X, y = make_regression()
     X[X < 2.5] = 0.0
     csr = sparse.csr_matrix(X)
diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py
index 100d50f6fcbf1..3bb09b1a44933 100644
--- a/sklearn/linear_model/tests/test_bayes.py
+++ b/sklearn/linear_model/tests/test_bayes.py
@@ -14,9 +14,7 @@
 
 
 def test_bayesian_on_diabetes():
-    """
-    Test BayesianRidge on diabetes
-    """
+    # Test BayesianRidge on diabetes
     raise SkipTest("XFailed Test")
     diabetes = datasets.load_diabetes()
     X, y = diabetes.data, diabetes.target
@@ -37,9 +35,7 @@ def test_bayesian_on_diabetes():
 
 
 def test_toy_bayesian_ridge_object():
-    """
-    Test BayesianRidge on toy
-    """
+    # Test BayesianRidge on toy
     X = np.array([[1], [2], [6], [8], [10]])
     Y = np.array([1, 2, 6, 8, 10])
     clf = BayesianRidge(compute_score=True)
@@ -51,9 +47,7 @@ def test_toy_bayesian_ridge_object():
 
 
 def test_toy_ard_object():
-    """
-    Test BayesianRegression ARD classifier
-    """
+    # Test BayesianRegression ARD classifier
     X = np.array([[1], [2], [3]])
     Y = np.array([1, 2, 3])
     clf = ARDRegression(compute_score=True)
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index cada4377d1807..21dcd1262ab60 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -33,7 +33,7 @@ def check_warnings():
 
 
 def test_lasso_zero():
-    """Check that the lasso can handle zero data without crashing"""
+    # Check that the lasso can handle zero data without crashing
     X = [[0], [0], [0]]
     y = [0, 0, 0]
     clf = Lasso(alpha=0.1).fit(X, y)
@@ -44,12 +44,9 @@ def test_lasso_zero():
 
 
 def test_lasso_toy():
-    """
-    Test Lasso on a toy example for various values of alpha.
-
-    When validating this against glmnet notice that glmnet divides it
-    against nobs.
-    """
+    # Test Lasso on a toy example for various values of alpha.
+    # When validating this against glmnet notice that glmnet divides it
+    # against nobs.
 
     X = [[-1], [0], [1]]
     Y = [-1, 0, 1]       # just a straight line
@@ -85,14 +82,10 @@ def test_lasso_toy():
 
 
 def test_enet_toy():
-    """
-    Test ElasticNet for various parameters of alpha and l1_ratio.
-
-    Actually, the parameters alpha = 0 should not be allowed. However,
-    we test it as a border case.
-
-    ElasticNet is tested with and without precomputed Gram matrix
-    """
+    # Test ElasticNet for various parameters of alpha and l1_ratio.
+    # Actually, the parameters alpha = 0 should not be allowed. However,
+    # we test it as a border case.
+    # ElasticNet is tested with and without precomputed Gram matrix
 
     X = np.array([[-1.], [0.], [1.]])
     Y = [-1, 0, 1]       # just a straight line
@@ -540,11 +533,9 @@ def test_warm_start_convergence_with_regularizer_decrement():
 
 
 def test_random_descent():
-    """Test that both random and cyclic selection give the same results.
-
-    Ensure that the test models fully converge and check a wide
-    range of conditions.
-    """
+    # Test that both random and cyclic selection give the same results.
+    # Ensure that the test models fully converge and check a wide
+    # range of conditions.
 
     # This uses the coordinate descent algo using the gram trick.
     X, y, _, _ = build_dataset(n_samples=50, n_features=20)
@@ -587,9 +578,7 @@ def test_random_descent():
 
 
 def test_deprection_precompute_enet():
-    """
-    Test that setting precompute="auto" gives a Deprecation Warning.
-    """
+    # Test that setting precompute="auto" gives a Deprecation Warning.
 
     X, y, _, _ = build_dataset(n_samples=20, n_features=10)
     clf = ElasticNet(precompute="auto")
@@ -599,9 +588,7 @@ def test_deprection_precompute_enet():
 
 
 def test_enet_path_positive():
-    """
-    Test that the coefs returned by positive=True in enet_path are positive
-    """
+    # Test that the coefs returned by positive=True in enet_path are positive
 
     X, y, _, _ = build_dataset(n_samples=50, n_features=50)
     for path in [enet_path, lasso_path]:
@@ -610,9 +597,7 @@ def test_enet_path_positive():
 
 
 def test_sparse_dense_descent_paths():
-    """
-    Test that dense and sparse input give the same input for descent paths.
-    """
+    # Test that dense and sparse input give the same input for descent paths.
     X, y, _, _ = build_dataset(n_samples=50, n_features=20)
     csr = sparse.csr_matrix(X)
     for path in [enet_path, lasso_path]:
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index d7e49d5d25dea..35e446a68c3ac 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -20,9 +20,7 @@
 
 
 def test_simple():
-    """
-    Principle of Lars is to keep covariances tied and decreasing
-    """
+    # Principle of Lars is to keep covariances tied and decreasing
 
     # also test verbose output
     from sklearn.externals.six.moves import cStringIO as StringIO
@@ -52,9 +50,7 @@ def test_simple():
 
 
 def test_simple_precomputed():
-    """
-    The same, with precomputed Gram matrix
-    """
+    # The same, with precomputed Gram matrix
 
     G = np.dot(diabetes.data.T, diabetes.data)
     alphas_, active, coef_path_ = linear_model.lars_path(
@@ -74,9 +70,7 @@ def test_simple_precomputed():
 
 
 def test_all_precomputed():
-    """
-    Test that lars_path with precomputed Gram and Xy gives the right answer
-    """
+    # Test that lars_path with precomputed Gram and Xy gives the right answer
     X, y = diabetes.data, diabetes.target
     G = np.dot(X.T, X)
     Xy = np.dot(X.T, y)
@@ -88,10 +82,8 @@ def test_all_precomputed():
 
 
 def test_lars_lstsq():
-    """
-    Test that Lars gives least square solution at the end
-    of the path
-    """
+    # Test that Lars gives least square solution at the end
+    # of the path
     X1 = 3 * diabetes.data  # use un-normalized dataset
     clf = linear_model.LassoLars(alpha=0.)
     clf.fit(X1, y)
@@ -100,17 +92,15 @@ def test_lars_lstsq():
 
 
 def test_lasso_gives_lstsq_solution():
-    """
-    Test that Lars Lasso gives least square solution at the end
-    of the path
-    """
+    # Test that Lars Lasso gives least square solution at the end
+    # of the path
     alphas_, active, coef_path_ = linear_model.lars_path(X, y, method="lasso")
     coef_lstsq = np.linalg.lstsq(X, y)[0]
     assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
 
 
 def test_collinearity():
-    """Check that lars_path is robust to collinearity in input"""
+    # Check that lars_path is robust to collinearity in input
     X = np.array([[3., 3., 1.],
                   [2., 2., 0.],
                   [1., 1., 0]])
@@ -133,9 +123,7 @@ def test_collinearity():
 
 
 def test_no_path():
-    """
-    Test that the ``return_path=False`` option returns the correct output
-    """
+    # Test that the ``return_path=False`` option returns the correct output
 
     alphas_, active_, coef_path_ = linear_model.lars_path(
         diabetes.data, diabetes.target, method="lar")
@@ -147,9 +135,7 @@ def test_no_path():
 
 
 def test_no_path_precomputed():
-    """
-    Test that the ``return_path=False`` option with Gram remains correct
-    """
+    # Test that the ``return_path=False`` option with Gram remains correct
 
     G = np.dot(diabetes.data.T, diabetes.data)
 
@@ -164,9 +150,7 @@ def test_no_path_precomputed():
 
 
 def test_no_path_all_precomputed():
-    """
-    Test that the ``return_path=False`` option with Gram and Xy remains correct
-    """
+    # Test that the ``return_path=False`` option with Gram and Xy remains correct
     X, y = 3 * diabetes.data, diabetes.target
     G = np.dot(X.T, X)
     Xy = np.dot(X.T, y)
@@ -217,10 +201,8 @@ def test_rank_deficient_design():
 
 
 def test_lasso_lars_vs_lasso_cd(verbose=False):
-    """
-    Test that LassoLars and Lasso using coordinate descent give the
-    same results.
-    """
+    # Test that LassoLars and Lasso using coordinate descent give the
+    # same results.
     X = 3 * diabetes.data
 
     alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
@@ -256,11 +238,9 @@ def test_lasso_lars_vs_lasso_cd(verbose=False):
 
 
 def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
-    """
-    Test that LassoLars and Lasso using coordinate descent give the
-    same results when early stopping is used.
-    (test : before, in the middle, and in the last part of the path)
-    """
+    # Test that LassoLars and Lasso using coordinate descent give the
+    # same results when early stopping is used.
+    # (test : before, in the middle, and in the last part of the path)
     alphas_min = [10, 0.9, 1e-4]
     for alphas_min in alphas_min:
         alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
@@ -358,11 +338,8 @@ def objective_function(coef):
 
 
 def test_lars_add_features():
-    """
-    assure that at least some features get added if necessary
-
-    test for 6d2b4c
-    """
+    # assure that at least some features get added if necessary
+    # test for 6d2b4c
     # Hilbert matrix
     n = 5
     H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
@@ -381,9 +358,7 @@ def test_lars_n_nonzero_coefs(verbose=False):
 
 
 def test_multitarget():
-    """
-    Assure that estimators receiving multidimensional y do the right thing
-    """
+    # Assure that estimators receiving multidimensional y do the right thing
     X = diabetes.data
     Y = np.vstack([diabetes.target, diabetes.target ** 2]).T
     n_targets = Y.shape[1]
@@ -406,12 +381,10 @@ def test_multitarget():
 
 
 def test_lars_cv():
-    """ Test the LassoLarsCV object by checking that the optimal alpha
-        increases as the number of samples increases.
-
-        This property is not actually garantied in general and is just a
-        property of the given dataset, with the given steps chosen.
-    """
+    # Test the LassoLarsCV object by checking that the optimal alpha
+    # increases as the number of samples increases.
+    # This property is not actually garantied in general and is just a
+    # property of the given dataset, with the given steps chosen.
     old_alpha = 0
     lars_cv = linear_model.LassoLarsCV()
     for length in (400, 200, 100):
@@ -423,11 +396,10 @@ def test_lars_cv():
 
 
 def test_lasso_lars_ic():
-    """ Test the LassoLarsIC object by checking that
-        - some good features are selected.
-        - alpha_bic > alpha_aic
-        - n_nonzero_bic < n_nonzero_aic
-    """
+    # Test the LassoLarsIC object by checking that
+    # - some good features are selected.
+    # - alpha_bic > alpha_aic
+    # - n_nonzero_bic < n_nonzero_aic
     lars_bic = linear_model.LassoLarsIC('bic')
     lars_aic = linear_model.LassoLarsIC('aic')
     rng = np.random.RandomState(42)
@@ -448,7 +420,7 @@ def test_lasso_lars_ic():
 
 
 def test_no_warning_for_zero_mse():
-    """LassoLarsIC should not warn for log of zero MSE."""
+    # LassoLarsIC should not warn for log of zero MSE.
     y = np.arange(10, dtype=float)
     X = y.reshape(-1, 1)
     lars = linear_model.LassoLarsIC(normalize=False)
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 40a5d3e000fd5..88fcc68f48256 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -51,10 +51,8 @@ def check_predictions(clf, X, y):
 
 
 def test_predict_2_classes():
-    """Simple sanity check on a 2 classes dataset
-
-    Make sure it predicts the correct result on simple datasets.
-    """
+    # Simple sanity check on a 2 classes dataset
+    # Make sure it predicts the correct result on simple datasets.
     check_predictions(LogisticRegression(random_state=0), X, Y1)
     check_predictions(LogisticRegression(random_state=0), X_sp, Y1)
 
@@ -68,7 +66,7 @@ def test_predict_2_classes():
 
 
 def test_error():
-    """Test for appropriate exception on errors"""
+    # Test for appropriate exception on errors
     assert_raises(ValueError, LogisticRegression(C=-1).fit, X, Y1)
 
 
@@ -78,7 +76,7 @@ def test_predict_3_classes():
 
 
 def test_predict_iris():
-    """Test logistic regression with the iris dataset"""
+    # Test logistic regression with the iris dataset
     n_samples, n_features = iris.data.shape
 
     target = iris.target_names[iris.target]
@@ -112,7 +110,7 @@ def test_multinomial_validation():
 
 
 def test_multinomial_binary():
-    """Test multinomial LR on a binary problem."""
+    # Test multinomial LR on a binary problem.
     target = (iris.target > 0).astype(np.intp)
     target = np.array(["setosa", "not-setosa"])[target]
 
@@ -133,7 +131,7 @@ def test_multinomial_binary():
 
 
 def test_sparsify():
-    """Test sparsify and densify members."""
+    # Test sparsify and densify members.
     n_samples, n_features = iris.data.shape
     target = iris.target_names[iris.target]
     clf = LogisticRegression(random_state=0).fit(iris.data, target)
@@ -156,7 +154,7 @@ def test_sparsify():
 
 
 def test_inconsistent_input():
-    """Test that an exception is raised on inconsistent input"""
+    # Test that an exception is raised on inconsistent input
     rng = np.random.RandomState(0)
     X_ = rng.random_sample((5, 10))
     y_ = np.ones(X_.shape[0])
@@ -174,7 +172,7 @@ def test_inconsistent_input():
 
 
 def test_write_parameters():
-    """Test that we can write to coef_ and intercept_"""
+    # Test that we can write to coef_ and intercept_
     clf = LogisticRegression(random_state=0)
     clf.fit(X, Y1)
     clf.coef_[:] = 0
@@ -184,17 +182,15 @@ def test_write_parameters():
 
 @raises(ValueError)
 def test_nan():
-    """Test proper NaN handling.
-
-    Regression test for Issue #252: fit used to go into an infinite loop.
-    """
+    # Test proper NaN handling.
+    # Regression test for Issue #252: fit used to go into an infinite loop.
     Xnan = np.array(X, dtype=np.float64)
     Xnan[0, 1] = np.nan
     LogisticRegression(random_state=0).fit(Xnan, Y1)
 
 
 def test_consistency_path():
-    """Test that the path algorithm is consistent"""
+    # Test that the path algorithm is consistent
     rng = np.random.RandomState(0)
     X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
     y = [1] * 100 + [-1] * 100
@@ -315,7 +311,7 @@ def test_logistic_loss_grad_hess():
 
 
 def test_logistic_cv():
-    """test for LogisticRegressionCV object"""
+    # test for LogisticRegressionCV object
     n_samples, n_features = 50, 5
     rng = np.random.RandomState(0)
     X_ref = rng.randn(n_samples, n_features)
@@ -388,7 +384,7 @@ def test_intercept_logistic_helper():
 
 
 def test_ovr_multinomial_iris():
-    """Test that OvR and multinomial are correct using the iris dataset."""
+    # Test that OvR and multinomial are correct using the iris dataset.
     train, target = iris.data, iris.target
     n_samples, n_features = train.shape
 
@@ -491,7 +487,7 @@ def test_logistic_regressioncv_class_weights():
 
 
 def test_logistic_regression_convergence_warnings():
-    """Test that warnings are raised if model does not converge"""
+    # Test that warnings are raised if model does not converge
 
     X, y = make_classification(n_samples=20, n_features=20)
     clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1)
@@ -500,7 +496,7 @@ def test_logistic_regression_convergence_warnings():
 
 
 def test_logistic_regression_multinomial():
-    """Tests for the multinomial option in logistic regression"""
+    # Tests for the multinomial option in logistic regression
 
     # Some basic attributes of Logistic Regression
     n_samples, n_features, n_classes = 50, 20, 3
@@ -576,13 +572,11 @@ def test_multinomial_loss_grad_hess():
 
 
 def test_liblinear_decision_function_zero():
-    """Test negative prediction when decision_function values are zero.
-
-    Liblinear predicts the positive class when decision_function values
-    are zero. This is a test to verify that we do not do the same.
-    See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
-    and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
-    """
+    # Test negative prediction when decision_function values are zero.
+    # Liblinear predicts the positive class when decision_function values
+    # are zero. This is a test to verify that we do not do the same.
+    # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
+    # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
     X, y = make_classification(n_samples=5, n_features=5)
     clf = LogisticRegression(fit_intercept=False)
     clf.fit(X, y)
@@ -593,7 +587,7 @@ def test_liblinear_decision_function_zero():
 
 
 def test_liblinear_logregcv_sparse():
-    """Test LogRegCV with solver='liblinear' works for sparse matrices"""
+    # Test LogRegCV with solver='liblinear' works for sparse matrices
 
     X, y = make_classification(n_samples=10, n_features=5)
     clf = LogisticRegressionCV(solver='liblinear')
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index c62cec838af87..55d4fb23cfadb 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -89,7 +89,7 @@ def test_classifier_partial_fit():
 
 
 def test_classifier_refit():
-    """Classifier can be retrained on different labels and features."""
+    # Classifier can be retrained on different labels and features.
     clf = PassiveAggressiveClassifier().fit(X, y)
     assert_array_equal(clf.classes_, np.unique(y))
 
diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py
index 3f202b3bf4000..9870eba987625 100644
--- a/sklearn/linear_model/tests/test_randomized_l1.py
+++ b/sklearn/linear_model/tests/test_randomized_l1.py
@@ -27,7 +27,7 @@
 
 
 def test_lasso_stability_path():
-    """Check lasso stability path"""
+    # Check lasso stability path
     # Load diabetes data and add noisy features
     scaling = 0.3
     coef_grid, scores_path = lasso_stability_path(X, y, scaling=scaling,
@@ -39,7 +39,7 @@ def test_lasso_stability_path():
 
 
 def test_randomized_lasso():
-    """Check randomized lasso"""
+    # Check randomized lasso
     scaling = 0.3
     selection_threshold = 0.5
 
@@ -76,7 +76,7 @@ def test_randomized_lasso():
 
 
 def test_randomized_logistic():
-    """Check randomized sparse logistic regression"""
+    # Check randomized sparse logistic regression
     iris = load_iris()
     X = iris.data[:, [0, 2]]
     y = iris.target
@@ -102,7 +102,7 @@ def test_randomized_logistic():
 
 
 def test_randomized_logistic_sparse():
-    """Check randomized sparse logistic regression on sparse data"""
+    # Check randomized sparse logistic regression on sparse data
     iris = load_iris()
     X = iris.data[:, [0, 2]]
     y = iris.target
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 677fa88b70511..0b1ac2487745d 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -48,11 +48,9 @@
 
 
 def test_ridge():
-    """Ridge regression convergence test using score
-
-    TODO: for this test to be robust, we should use a dataset instead
-    of np.random.
-    """
+    # Ridge regression convergence test using score
+    # TODO: for this test to be robust, we should use a dataset instead
+    # of np.random.
     rng = np.random.RandomState(0)
     alpha = 1.0
 
@@ -160,8 +158,7 @@ def test_ridge_sample_weights():
 
 
 def test_ridge_shapes():
-    """Test shape of coef_ and intercept_
-    """
+    # Test shape of coef_ and intercept_
     rng = np.random.RandomState(0)
     n_samples, n_features = 5, 10
     X = rng.randn(n_samples, n_features)
@@ -185,8 +182,7 @@ def test_ridge_shapes():
 
 
 def test_ridge_intercept():
-    """Test intercept with multiple targets GH issue #708
-    """
+    # Test intercept with multiple targets GH issue #708
     rng = np.random.RandomState(0)
     n_samples, n_features = 5, 10
     X = rng.randn(n_samples, n_features)
@@ -204,10 +200,8 @@ def test_ridge_intercept():
 
 
 def test_toy_ridge_object():
-    """Test BayesianRegression ridge classifier
-
-    TODO: test also n_samples > n_features
-    """
+    # Test BayesianRegression ridge classifier
+    # TODO: test also n_samples > n_features
     X = np.array([[1], [2]])
     Y = np.array([1, 2])
     clf = Ridge(alpha=0.0)
@@ -228,7 +222,7 @@ def test_toy_ridge_object():
 
 
 def test_ridge_vs_lstsq():
-    """On alpha=0., Ridge and OLS yield the same solution."""
+    # On alpha=0., Ridge and OLS yield the same solution.
 
     rng = np.random.RandomState(0)
     # we need more samples than features
@@ -249,7 +243,7 @@ def test_ridge_vs_lstsq():
 
 
 def test_ridge_individual_penalties():
-    """Tests the ridge object using individual penalties"""
+    # Tests the ridge object using individual penalties
 
     rng = np.random.RandomState(42)
 
@@ -458,9 +452,7 @@ def test_ridge_sparse_svd():
 
 
 def test_class_weights():
-    """
-    Test class weights.
-    """
+    # Test class weights.
     X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
     y = [1, 1, 1, -1, -1]
@@ -496,9 +488,7 @@ def test_class_weights():
 
 
 def test_class_weights_cv():
-    """
-    Test class weights for cross validated ridge classifier.
-    """
+    # Test class weights for cross validated ridge classifier.
     X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
     y = [1, 1, 1, -1, -1]
@@ -514,9 +504,7 @@ def test_class_weights_cv():
 
 
 def test_ridgecv_store_cv_values():
-    """
-    Test _RidgeCV's store_cv_values attribute.
-    """
+    # Test _RidgeCV's store_cv_values attribute.
     rng = rng = np.random.RandomState(42)
 
     n_samples = 8
@@ -540,7 +528,7 @@ def test_ridgecv_store_cv_values():
 
 
 def test_raises_value_error_if_sample_weights_greater_than_1d():
-    """Sample weights must be either scalar or 1D"""
+    # Sample weights must be either scalar or 1D
 
     n_sampless = [2, 3]
     n_featuress = [3, 2]
@@ -579,7 +567,7 @@ def fit_ridge_not_ok_2():
 
 
 def test_sparse_design_with_sample_weights():
-    """Sample weights must work with sparse matrices"""
+    # Sample weights must work with sparse matrices
 
     n_sampless = [2, 3]
     n_featuress = [3, 2]
@@ -610,8 +598,8 @@ def test_sparse_design_with_sample_weights():
 
 
 def test_raises_value_error_if_solver_not_supported():
-    """Tests whether a ValueError is raised if a non-identified solver
-    is passed to ridge_regression"""
+    # Tests whether a ValueError is raised if a non-identified solver
+    # is passed to ridge_regression
 
     wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)"
 
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index a5fd6b3c03dd7..0e7d9b4c9952c 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -174,7 +174,7 @@ def test_warm_start_optimal(self):
         self._test_warm_start(X, Y, "optimal")
 
     def test_input_format(self):
-        """Input format tests. """
+        # Input format tests.
         clf = self.factory(alpha=0.01, n_iter=5,
                            shuffle=False)
         clf.fit(X, Y)
@@ -184,7 +184,7 @@ def test_input_format(self):
         assert_raises(ValueError, clf.fit, X, Y_)
 
     def test_clone(self):
-        """Test whether clone works ok. """
+        # Test whether clone works ok.
         clf = self.factory(alpha=0.01, n_iter=5, penalty='l1')
         clf = clone(clf)
         clf.set_params(penalty='l2')
@@ -257,9 +257,8 @@ def test_late_onset_averaging_reached(self):
 class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest):
     """Test suite for the dense representation variant of SGD"""
     factory_class = SGDClassifier
-
     def test_sgd(self):
-        """Check that SGD gives any results :-)"""
+        # Check that SGD gives any results :-)
 
         for loss in ("hinge", "squared_hinge", "log", "modified_huber"):
             clf = self.factory(penalty='l2', alpha=0.01, fit_intercept=True,
@@ -270,68 +269,68 @@ def test_sgd(self):
 
     @raises(ValueError)
     def test_sgd_bad_l1_ratio(self):
-        """Check whether expected ValueError on bad l1_ratio"""
+        # Check whether expected ValueError on bad l1_ratio
         self.factory(l1_ratio=1.1)
 
     @raises(ValueError)
     def test_sgd_bad_learning_rate_schedule(self):
-        """Check whether expected ValueError on bad learning_rate"""
+        # Check whether expected ValueError on bad learning_rate
         self.factory(learning_rate="<unknown>")
 
     @raises(ValueError)
     def test_sgd_bad_eta0(self):
-        """Check whether expected ValueError on bad eta0"""
+        # Check whether expected ValueError on bad eta0
         self.factory(eta0=0, learning_rate="constant")
 
     @raises(ValueError)
     def test_sgd_bad_alpha(self):
-        """Check whether expected ValueError on bad alpha"""
+        # Check whether expected ValueError on bad alpha
         self.factory(alpha=-.1)
 
     @raises(ValueError)
     def test_sgd_bad_penalty(self):
-        """Check whether expected ValueError on bad penalty"""
+        # Check whether expected ValueError on bad penalty
         self.factory(penalty='foobar', l1_ratio=0.85)
 
     @raises(ValueError)
     def test_sgd_bad_loss(self):
-        """Check whether expected ValueError on bad loss"""
+        # Check whether expected ValueError on bad loss
         self.factory(loss="foobar")
 
     @raises(ValueError)
     def test_sgd_n_iter_param(self):
-        """Test parameter validity check"""
+        # Test parameter validity check
         self.factory(n_iter=-10000)
 
     @raises(ValueError)
     def test_sgd_shuffle_param(self):
-        """Test parameter validity check"""
+        # Test parameter validity check
         self.factory(shuffle="false")
 
     @raises(TypeError)
     def test_argument_coef(self):
-        """Checks coef_init not allowed as model argument (only fit)"""
+        # Checks coef_init not allowed as model argument (only fit)
         # Provided coef_ does not match dataset.
         self.factory(coef_init=np.zeros((3,))).fit(X, Y)
 
     @raises(ValueError)
     def test_provide_coef(self):
-        """Checks coef_init shape for the warm starts"""
+        # Checks coef_init shape for the warm starts
         # Provided coef_ does not match dataset.
         self.factory().fit(X, Y, coef_init=np.zeros((3,)))
 
     @raises(ValueError)
     def test_set_intercept(self):
-        """Checks intercept_ shape for the warm starts"""
+        # Checks intercept_ shape for the warm starts
         # Provided intercept_ does not match dataset.
         self.factory().fit(X, Y, intercept_init=np.zeros((3,)))
 
     def test_set_intercept_binary(self):
-        """Checks intercept_ shape for the warm starts in binary case"""
+        # Checks intercept_ shape for the warm starts in binary case
         self.factory().fit(X5, Y5, intercept_init=0)
 
     def test_average_binary_computed_correctly(self):
-        """Checks the SGDClassifier correctly computes the average weights"""
+        # Checks the SGDClassifier correctly computes the average weights
         eta = .1
         alpha = 2.
         n_samples = 20
@@ -360,7 +359,7 @@ def test_average_binary_computed_correctly(self):
         assert_almost_equal(clf.intercept_, average_intercept, decimal=14)
 
     def test_set_intercept_to_intercept(self):
-        """Checks intercept_ shape consistency for the warm starts"""
+        # Checks intercept_ shape consistency for the warm starts
         # Inconsistent intercept_ shape.
         clf = self.factory().fit(X5, Y5)
         self.factory().fit(X5, Y5, intercept_init=clf.intercept_)
@@ -369,11 +368,11 @@ def test_set_intercept_to_intercept(self):
 
     @raises(ValueError)
     def test_sgd_at_least_two_labels(self):
-        """Target must have at least two labels"""
+        # Target must have at least two labels
         self.factory(alpha=0.01, n_iter=20).fit(X2, np.ones(9))
 
     def test_partial_fit_weight_class_auto(self):
-        """partial_fit with class_weight='auto' not supported"""
+        # partial_fit with class_weight='auto' not supported
         assert_raises_regexp(ValueError,
                              "class_weight 'auto' is not supported for "
                              "partial_fit. In order to use 'auto' weights, "
@@ -387,7 +386,7 @@ def test_partial_fit_weight_class_auto(self):
                              X, Y, classes=np.unique(Y))
 
     def test_sgd_multiclass(self):
-        """Multi-class test case"""
+        # Multi-class test case
         clf = self.factory(alpha=0.01, n_iter=20).fit(X2, Y2)
         assert_equal(clf.coef_.shape, (3, 2))
         assert_equal(clf.intercept_.shape, (3,))
@@ -398,7 +397,7 @@ def test_sgd_multiclass(self):
     def test_sgd_multiclass_average(self):
         eta = .001
         alpha = .01
-        """Multi-class average test case"""
+        # Multi-class average test case
         clf = self.factory(loss='squared_loss',
                            learning_rate='constant',
                            eta0=eta, alpha=alpha,
@@ -419,7 +418,7 @@ def test_sgd_multiclass_average(self):
                                 decimal=16)
 
     def test_sgd_multiclass_with_init_coef(self):
-        """Multi-class test case"""
+        # Multi-class test case
         clf = self.factory(alpha=0.01, n_iter=20)
         clf.fit(X2, Y2, coef_init=np.zeros((3, 2)),
                 intercept_init=np.zeros(3))
@@ -429,7 +428,7 @@ def test_sgd_multiclass_with_init_coef(self):
         assert_array_equal(pred, true_result2)
 
     def test_sgd_multiclass_njobs(self):
-        """Multi-class test case with multi-core support"""
+        # Multi-class test case with multi-core support
         clf = self.factory(alpha=0.01, n_iter=20, n_jobs=2).fit(X2, Y2)
         assert_equal(clf.coef_.shape, (3, 2))
         assert_equal(clf.intercept_.shape, (3,))
@@ -438,8 +437,8 @@ def test_sgd_multiclass_njobs(self):
         assert_array_equal(pred, true_result2)
 
     def test_set_coef_multiclass(self):
-        """Checks coef_init and intercept_init shape for for multi-class
-        problems"""
+        # Checks coef_init and intercept_init shape for for multi-class
+        # problems
         # Provided coef_ does not match dataset
         clf = self.factory()
         assert_raises(ValueError, clf.fit, X2, Y2, coef_init=np.zeros((2, 2)))
@@ -456,7 +455,7 @@ def test_set_coef_multiclass(self):
         clf = self.factory().fit(X2, Y2, intercept_init=np.zeros((3,)))
 
     def test_sgd_proba(self):
-        """Check SGD.predict_proba"""
+        # Check SGD.predict_proba
 
         # Hinge loss does not allow for conditional prob estimate.
         # We cannot use the factory here, because it defines predict_proba
@@ -523,7 +522,7 @@ def test_sgd_proba(self):
             assert_array_almost_equal(p[0], [1 / 3.] * 3)
 
     def test_sgd_l1(self):
-        """Test L1 regularization"""
+        # Test L1 regularization
         n = len(X4)
         rng = np.random.RandomState(13)
         idx = np.arange(n)
@@ -552,9 +551,7 @@ def test_sgd_l1(self):
         assert_array_equal(pred, Y)
 
     def test_class_weights(self):
-        """
-        Test class weights.
-        """
+        # Test class weights.
         X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                       [1.0, 1.0], [1.0, 0.0]])
         y = [1, 1, 1, -1, -1]
@@ -574,7 +571,7 @@ def test_class_weights(self):
         assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
 
     def test_equal_class_weight(self):
-        """Test if equal class weights approx. equals no class weights. """
+        # Test if equal class weights approx. equals no class weights.
         X = [[1, 0], [1, 0], [0, 1], [0, 1]]
         y = [0, 0, 1, 1]
         clf = self.factory(alpha=0.1, n_iter=1000, class_weight=None)
@@ -591,18 +588,18 @@ def test_equal_class_weight(self):
 
     @raises(ValueError)
     def test_wrong_class_weight_label(self):
-        """ValueError due to not existing class label."""
+        # ValueError due to not existing class label.
         clf = self.factory(alpha=0.1, n_iter=1000, class_weight={0: 0.5})
         clf.fit(X, Y)
 
     @raises(ValueError)
     def test_wrong_class_weight_format(self):
-        """ValueError due to wrong class_weight argument type."""
+        # ValueError due to wrong class_weight argument type.
         clf = self.factory(alpha=0.1, n_iter=1000, class_weight=[0.5])
         clf.fit(X, Y)
 
     def test_weights_multiplied(self):
-        """Tests that class_weight and sample_weight are multiplicative"""
+        # Tests that class_weight and sample_weight are multiplicative
         class_weights = {1: .6, 2: .3}
         sample_weights = np.random.random(Y4.shape[0])
         multiplied_together = np.copy(sample_weights)
@@ -618,7 +615,7 @@ def test_weights_multiplied(self):
         assert_almost_equal(clf1.coef_, clf2.coef_)
 
     def test_auto_weight(self):
-        """Test class weights for imbalanced data"""
+        # Test class weights for imbalanced data
         # compute reference metrics on iris dataset that is quite balanced by
         # default
         X, y = iris.data, iris.target
@@ -669,7 +666,7 @@ def test_auto_weight(self):
         assert_greater(metrics.f1_score(y, y_pred, average='weighted'), 0.96)
 
     def test_sample_weights(self):
-        """Test weights on individual samples"""
+        # Test weights on individual samples
         X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                       [1.0, 1.0], [1.0, 0.0]])
         y = [1, 1, 1, -1, -1]
@@ -687,7 +684,7 @@ def test_sample_weights(self):
 
     @raises(ValueError)
     def test_wrong_sample_weights(self):
-        """Test if ValueError is raised if sample_weight has wrong shape"""
+        # Test if ValueError is raised if sample_weight has wrong shape
         clf = self.factory(alpha=0.1, n_iter=1000, fit_intercept=False)
         # provided sample_weight too long
         clf.fit(X, Y, sample_weight=np.arange(7))
@@ -734,11 +731,9 @@ def test_partial_fit_multiclass(self):
         assert_true(id1, id2)
 
     def test_fit_then_partial_fit(self):
-        """Partial_fit should work after initial fit in the multiclass case.
-
-        Non-regression test for #2496; fit would previously produce a
-        Fortran-ordered coef_ that subsequent partial_fit couldn't handle.
-        """
+        # Partial_fit should work after initial fit in the multiclass case.
+        # Non-regression test for #2496; fit would previously produce a
+        # Fortran-ordered coef_ that subsequent partial_fit couldn't handle.
         clf = self.factory()
         clf.fit(X2, Y2)
         clf.partial_fit(X2, Y2)     # no exception here
@@ -794,7 +789,7 @@ def test_warm_start_multiclass(self):
         self._test_warm_start(X2, Y2, "optimal")
 
     def test_multiple_fit(self):
-        """Test multiple calls of fit w/ different shaped inputs."""
+        # Test multiple calls of fit w/ different shaped inputs.
         clf = self.factory(alpha=0.01, n_iter=5,
                            shuffle=False)
         clf.fit(X, Y)
@@ -820,7 +815,7 @@ class DenseSGDRegressorTestCase(unittest.TestCase, CommonTest):
     factory_class = SGDRegressor
 
     def test_sgd(self):
-        """Check that SGD gives any results."""
+        # Check that SGD gives any results.
         clf = self.factory(alpha=0.1, n_iter=2,
                            fit_intercept=False)
         clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
@@ -828,16 +823,16 @@ def test_sgd(self):
 
     @raises(ValueError)
     def test_sgd_bad_penalty(self):
-        """Check whether expected ValueError on bad penalty"""
+        # Check whether expected ValueError on bad penalty
         self.factory(penalty='foobar', l1_ratio=0.85)
 
     @raises(ValueError)
     def test_sgd_bad_loss(self):
-        """Check whether expected ValueError on bad loss"""
+        # Check whether expected ValueError on bad loss
         self.factory(loss="foobar")
 
     def test_sgd_averaged_computed_correctly(self):
-        """Tests the average regressor matches the naive implementation"""
+        # Tests the average regressor matches the naive implementation
 
         eta = .001
         alpha = .01
@@ -865,7 +860,7 @@ def test_sgd_averaged_computed_correctly(self):
         assert_almost_equal(clf.intercept_, average_intercept, decimal=16)
 
     def test_sgd_averaged_partial_fit(self):
-        """Tests whether the partial fit yields the same average as the fit"""
+        # Tests whether the partial fit yields the same average as the fit
         eta = .001
         alpha = .01
         n_samples = 20
@@ -893,7 +888,7 @@ def test_sgd_averaged_partial_fit(self):
         assert_almost_equal(clf.intercept_[0], average_intercept, decimal=16)
 
     def test_average_sparse(self):
-        """Checks the average weights on data with 0s"""
+        # Checks the average weights on data with 0s
 
         eta = .001
         alpha = .01
@@ -989,7 +984,7 @@ def test_sgd_huber_fit(self):
         assert_greater(score, 0.5)
 
     def test_elasticnet_convergence(self):
-        """Check that the SGD output is consistent with coordinate descent"""
+        # Check that the SGD output is consistent with coordinate descent
 
         n_samples, n_features = 1000, 5
         rng = np.random.RandomState(0)
@@ -1062,13 +1057,13 @@ def test_loss_function_epsilon(self):
 
 
 class SparseSGDRegressorTestCase(DenseSGDRegressorTestCase):
-    """Run exactly the same tests using the sparse representation variant"""
+    # Run exactly the same tests using the sparse representation variant
 
     factory_class = SparseSGDRegressor
 
 
 def test_l1_ratio():
-    """Test if l1 ratio extremes match L1 and L2 penalty settings. """
+    # Test if l1 ratio extremes match L1 and L2 penalty settings.
     X, y = datasets.make_classification(n_samples=1000,
                                         n_features=100, n_informative=20,
                                         random_state=1234)
diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
index cafcc3639d9cc..288831bf7411f 100644
--- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@@ -15,7 +15,7 @@
 
 
 def test_sparse_coef():
-    """ Check that the sparse_coef propery works """
+    # Check that the sparse_coef propery works
     clf = ElasticNet()
     clf.coef_ = [1, 2, 3]
 
@@ -24,7 +24,7 @@ def test_sparse_coef():
 
 
 def test_normalize_option():
-    """ Check that the normalize option in enet works """
+    # Check that the normalize option in enet works
     X = sp.csc_matrix([[-1], [0], [1]])
     y = [-1, 0, 1]
     clf_dense = ElasticNet(fit_intercept=True, normalize=True)
@@ -37,7 +37,7 @@ def test_normalize_option():
 
 
 def test_lasso_zero():
-    """Check that the sparse lasso can handle zero data without crashing"""
+    # Check that the sparse lasso can handle zero data without crashing
     X = sp.csc_matrix((3, 1))
     y = [0, 0, 0]
     T = np.array([[1], [2], [3]])
@@ -49,7 +49,7 @@ def test_lasso_zero():
 
 
 def test_enet_toy_list_input():
-    """Test ElasticNet for various values of alpha and l1_ratio with list X"""
+    # Test ElasticNet for various values of alpha and l1_ratio with list X
 
     X = np.array([[-1], [0], [1]])
     X = sp.csc_matrix(X)
@@ -82,8 +82,7 @@ def test_enet_toy_list_input():
 
 
 def test_enet_toy_explicit_sparse_input():
-    """Test ElasticNet for various values of alpha and l1_ratio with sparse
-    X"""
+    # Test ElasticNet for various values of alpha and l1_ratio with sparse X
     f = ignore_warnings
     # training samples
     X = sp.lil_matrix((3, 1))
diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
index 6313def4cf826..846445632cfb0 100644
--- a/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -45,7 +45,7 @@ def _check_with_col_sign_flipping(A, B, tol=0.0):
 
 
 def test_spectral_embedding_two_components(seed=36):
-    """Test spectral embedding with two components"""
+    # Test spectral embedding with two components
     random_state = np.random.RandomState(seed)
     n_sample = 100
     affinity = np.zeros(shape=[n_sample * 2,
@@ -77,7 +77,7 @@ def test_spectral_embedding_two_components(seed=36):
 
 
 def test_spectral_embedding_precomputed_affinity(seed=36):
-    """Test spectral embedding with precomputed kernel"""
+    # Test spectral embedding with precomputed kernel
     gamma = 1.0
     se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed",
                                    random_state=np.random.RandomState(seed))
@@ -92,7 +92,7 @@ def test_spectral_embedding_precomputed_affinity(seed=36):
 
 
 def test_spectral_embedding_callable_affinity(seed=36):
-    """Test spectral embedding with callable affinity"""
+    # Test spectral embedding with callable affinity
     gamma = 0.9
     kern = rbf_kernel(S, gamma=gamma)
     se_callable = SpectralEmbedding(n_components=2,
@@ -113,7 +113,7 @@ def test_spectral_embedding_callable_affinity(seed=36):
 
 
 def test_spectral_embedding_amg_solver(seed=36):
-    """Test spectral embedding with amg solver"""
+    # Test spectral embedding with amg solver
     try:
         from pyamg import smoothed_aggregation_solver
     except ImportError:
@@ -131,7 +131,7 @@ def test_spectral_embedding_amg_solver(seed=36):
 
 
 def test_pipeline_spectral_clustering(seed=36):
-    """Test using pipeline to do spectral clustering"""
+    # Test using pipeline to do spectral clustering
     random_state = np.random.RandomState(seed)
     se_rbf = SpectralEmbedding(n_components=n_clusters,
                                affinity="rbf",
@@ -150,7 +150,7 @@ def test_pipeline_spectral_clustering(seed=36):
 
 
 def test_spectral_embedding_unknown_eigensolver(seed=36):
-    """Test that SpectralClustering fails with an unknown eigensolver"""
+    # Test that SpectralClustering fails with an unknown eigensolver
     se = SpectralEmbedding(n_components=1, affinity="precomputed",
                            random_state=np.random.RandomState(seed),
                            eigen_solver="<unknown>")
@@ -158,14 +158,14 @@ def test_spectral_embedding_unknown_eigensolver(seed=36):
 
 
 def test_spectral_embedding_unknown_affinity(seed=36):
-    """Test that SpectralClustering fails with an unknown affinity type"""
+    # Test that SpectralClustering fails with an unknown affinity type
     se = SpectralEmbedding(n_components=1, affinity="<unknown>",
                            random_state=np.random.RandomState(seed))
     assert_raises(ValueError, se.fit, S)
 
 
 def test_connectivity(seed=36):
-    """Test that graph connectivity test works as expected"""
+    # Test that graph connectivity test works as expected
     graph = np.array([[1, 0, 0, 0, 0],
                       [0, 1, 1, 0, 0],
                       [0, 1, 1, 1, 0],
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index ec857403a5fed..a76f177ae0da7 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -19,7 +19,7 @@
 
 
 def test_gradient_descent_stops():
-    """Test stopping conditions of gradient descent."""
+    # Test stopping conditions of gradient descent.
     class ObjectiveSmallGradient:
         def __init__(self):
             self.it = -1
@@ -97,7 +97,7 @@ def flat_function(_):
 
 
 def test_binary_search():
-    """Test if the binary search finds Gaussians with desired perplexity."""
+    # Test if the binary search finds Gaussians with desired perplexity.
     random_state = check_random_state(0)
     distances = random_state.randn(50, 2)
     distances = distances.dot(distances.T)
@@ -111,7 +111,7 @@ def test_binary_search():
 
 
 def test_gradient():
-    """Test gradient of Kullback-Leibler divergence."""
+    # Test gradient of Kullback-Leibler divergence.
     random_state = check_random_state(0)
 
     n_samples = 50
@@ -135,7 +135,7 @@ def test_gradient():
 
 
 def test_trustworthiness():
-    """Test trustworthiness score."""
+    # Test trustworthiness score.
     random_state = check_random_state(0)
 
     # Affine transformation
@@ -155,7 +155,7 @@ def test_trustworthiness():
 
 
 def test_preserve_trustworthiness_approximately():
-    """Nearest neighbors should be preserved approximately."""
+    # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
     for init in ('random', 'pca'):
@@ -167,7 +167,7 @@ def test_preserve_trustworthiness_approximately():
 
 
 def test_fit_csr_matrix():
-    """X can be a sparse matrix."""
+    # X can be a sparse matrix.
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
     X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0
@@ -180,7 +180,7 @@ def test_fit_csr_matrix():
 
 
 def test_preserve_trustworthiness_approximately_with_precomputed_distances():
-    """Nearest neighbors should be preserved approximately."""
+    # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
     D = squareform(pdist(X), "sqeuclidean")
@@ -192,41 +192,41 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances():
 
 
 def test_early_exaggeration_too_small():
-    """Early exaggeration factor must be >= 1."""
+    # Early exaggeration factor must be >= 1.
     tsne = TSNE(early_exaggeration=0.99)
     assert_raises_regexp(ValueError, "early_exaggeration .*",
                          tsne.fit_transform, np.array([[0.0]]))
 
 
 def test_too_few_iterations():
-    """Number of gradient descent iterations must be at least 200."""
+    # Number of gradient descent iterations must be at least 200.
     tsne = TSNE(n_iter=199)
     assert_raises_regexp(ValueError, "n_iter .*", tsne.fit_transform,
                          np.array([[0.0]]))
 
 
 def test_non_square_precomputed_distances():
-    """Precomputed distance matrices must be square matrices."""
+    # Precomputed distance matrices must be square matrices.
     tsne = TSNE(metric="precomputed")
     assert_raises_regexp(ValueError, ".* square distance matrix",
                          tsne.fit_transform, np.array([[0.0], [1.0]]))
 
 
 def test_init_not_available():
-    """'init' must be 'pca' or 'random'."""
+    # 'init' must be 'pca' or 'random'.
     assert_raises_regexp(ValueError, "'init' must be either 'pca' or 'random'",
                          TSNE, init="not available")
 
 
 def test_distance_not_available():
-    """'metric' must be valid."""
+    # 'metric' must be valid.
     tsne = TSNE(metric="not available")
     assert_raises_regexp(ValueError, "Unknown metric not available.*",
                          tsne.fit_transform, np.array([[0.0], [1.0]]))
 
 
 def test_pca_initialization_not_compatible_with_precomputed_kernel():
-    """Precomputed distance matrices must be square matrices."""
+    # Precomputed distance matrices must be square matrices.
     tsne = TSNE(metric="precomputed", init="pca")
     assert_raises_regexp(ValueError, "The parameter init=\"pca\" cannot be "
                          "used with metric=\"precomputed\".",
@@ -257,7 +257,7 @@ def test_verbose():
 
 
 def test_chebyshev_metric():
-    """t-SNE should allow metrics that cannot be squared (issue #3526)."""
+    # t-SNE should allow metrics that cannot be squared (issue #3526).
     random_state = check_random_state(0)
     tsne = TSNE(metric="chebyshev")
     X = random_state.randn(5, 2)
@@ -265,7 +265,7 @@ def test_chebyshev_metric():
 
 
 def test_reduction_to_one_component():
-    """t-SNE should allow reduction to one component (issue #4154)."""
+    # t-SNE should allow reduction to one component (issue #4154).
     random_state = check_random_state(0)
     tsne = TSNE(n_components=1)
     X = random_state.randn(5, 2)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 5f70e235a9c33..30b310ca6216c 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -128,7 +128,7 @@ def test_multilabel_accuracy_score_subset_accuracy():
 
 
 def test_precision_recall_f1_score_binary():
-    """Test Precision Recall and F1 Score for binary classification task"""
+    # Test Precision Recall and F1 Score for binary classification task
     y_true, y_pred, _ = make_prediction(binary=True)
 
     # detailed measures for each class
@@ -161,10 +161,9 @@ def test_precision_recall_f1_score_binary():
 
 @ignore_warnings
 def test_precision_recall_f_binary_single_class():
-    """Test precision, recall and F1 score behave with a single positive or
-    negative class
-
-    Such a case may occur with non-stratified cross-validation"""
+    # Test precision, recall and F1 score behave with a single positive or
+    # negative class
+    # Such a case may occur with non-stratified cross-validation
     assert_equal(1., precision_score([1, 1], [1, 1]))
     assert_equal(1., recall_score([1, 1], [1, 1]))
     assert_equal(1., f1_score([1, 1], [1, 1]))
@@ -175,9 +174,8 @@ def test_precision_recall_f_binary_single_class():
 
 
 def test_average_precision_score_score_non_binary_class():
-    """Test that average_precision_score function returns an error when trying
-    to compute average_precision_score for multiclass task.
-    """
+    # Test that average_precision_score function returns an error when trying
+    # to compute average_precision_score for multiclass task.
     rng = check_random_state(404)
     y_pred = rng.rand(10)
 
@@ -229,7 +227,7 @@ def test_precision_recall_fscore_support_errors():
 
 
 def test_confusion_matrix_binary():
-    """Test confusion matrix - binary classification case"""
+    # Test confusion matrix - binary classification case
     y_true, y_pred, _ = make_prediction(binary=True)
 
     def test(y_true, y_pred):
@@ -257,7 +255,7 @@ def test_matthews_corrcoef_nan():
 
 
 def test_precision_recall_f1_score_multiclass():
-    """Test Precision Recall and F1 Score for multiclass classification task"""
+    # Test Precision Recall and F1 Score for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
 
     # compute scores with default labels introspection
@@ -326,10 +324,8 @@ def test_precision_refcall_f1_score_multilabel_unordered_labels():
 
 
 def test_precision_recall_f1_score_multiclass_pos_label_none():
-    """Test Precision Recall and F1 Score for multiclass classification task
-
-    GH Issue #1296
-    """
+    # Test Precision Recall and F1 Score for multiclass classification task
+    # GH Issue #1296
     # initialize data
     y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1])
     y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1])
@@ -341,7 +337,7 @@ def test_precision_recall_f1_score_multiclass_pos_label_none():
 
 
 def test_zero_precision_recall():
-    """Check that pathological cases do not bring NaNs"""
+    # Check that pathological cases do not bring NaNs
 
     old_error_settings = np.seterr(all='raise')
 
@@ -361,7 +357,7 @@ def test_zero_precision_recall():
 
 
 def test_confusion_matrix_multiclass():
-    """Test confusion matrix - multi-class case"""
+    # Test confusion matrix - multi-class case
     y_true, y_pred, _ = make_prediction(binary=False)
 
     def test(y_true, y_pred, string_type=False):
@@ -387,7 +383,7 @@ def test(y_true, y_pred, string_type=False):
 
 
 def test_confusion_matrix_multiclass_subset_labels():
-    """Test confusion matrix - multi-class case with subset of labels"""
+    # Test confusion matrix - multi-class case with subset of labels
     y_true, y_pred, _ = make_prediction(binary=False)
 
     # compute confusion matrix with only first two labels considered
@@ -403,7 +399,7 @@ def test_confusion_matrix_multiclass_subset_labels():
 
 
 def test_classification_report_multiclass():
-    """Test performance report"""
+    # Test performance report
     iris = datasets.load_iris()
     y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
 
@@ -421,7 +417,6 @@ def test_classification_report_multiclass():
         y_true, y_pred, labels=np.arange(len(iris.target_names)),
         target_names=iris.target_names)
     assert_equal(report, expected_report)
-
     # print classification report with label detection
     expected_report = """\
              precision    recall  f1-score   support
@@ -437,7 +432,7 @@ def test_classification_report_multiclass():
 
 
 def test_classification_report_multiclass_with_digits():
-    """Test performance report with added digits in floating point values"""
+    # Test performance report with added digits in floating point values
     iris = datasets.load_iris()
     y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
 
@@ -455,7 +450,6 @@ def test_classification_report_multiclass_with_digits():
         y_true, y_pred, labels=np.arange(len(iris.target_names)),
         target_names=iris.target_names, digits=5)
     assert_equal(report, expected_report)
-
     # print classification report with label detection
     expected_report = """\
              precision    recall  f1-score   support
@@ -526,8 +520,6 @@ def test_classification_report_multiclass_with_unicode_label():
     else:
         report = classification_report(y_true, y_pred)
         assert_equal(report, expected_report)
-
-
 @ignore_warnings  # sequence of sequences is deprecated
 def test_multilabel_classification_report():
     n_classes = 4
@@ -537,7 +529,6 @@ def test_multilabel_classification_report():
                            n_samples=n_samples)
     _, y_pred_ll = make_ml(n_features=1, n_classes=n_classes, random_state=1,
                            n_samples=n_samples)
-
     expected_report = """\
              precision    recall  f1-score   support
 
@@ -653,8 +644,7 @@ def test_multilabel_jaccard_similarity_score():
 
 @ignore_warnings
 def test_precision_recall_f1_score_multilabel_1():
-    """ Test precision_recall_f1_score on a crafted multilabel example
-    """
+    # Test precision_recall_f1_score on a crafted multilabel example
     # First crafted example
     y_true_ll = [(0,), (1,), (2, 3)]
     y_pred_ll = [(1,), (1,), (2, 0)]
@@ -730,8 +720,7 @@ def test_precision_recall_f1_score_multilabel_1():
 
 @ignore_warnings
 def test_precision_recall_f1_score_multilabel_2():
-    """ Test precision_recall_f1_score on a crafted multilabel example 2
-    """
+    # Test precision_recall_f1_score on a crafted multilabel example 2
     # Second crafted example
     y_true_ll = [(1,), (2,), (2, 3)]
     y_pred_ll = [(4,), (4,), (2, 1)]
@@ -1014,8 +1003,7 @@ def test_fscore_warnings():
 
 
 def test_prf_average_compat():
-    """Ensure warning if f1_score et al.'s average is implicit for multiclass
-    """
+    # Ensure warning if f1_score et al.'s average is implicit for multiclass
     y_true = [1, 2, 3, 3]
     y_pred = [1, 2, 3, 1]
     y_true_bin = [0, 1, 1]
@@ -1044,8 +1032,8 @@ def test_prf_average_compat():
 
 @ignore_warnings  # sequence of sequences is deprecated
 def test__check_targets():
-    """Check that _check_targets correctly merges target types, squeezes
-    output and fails if input lengths differ."""
+    # Check that _check_targets correctly merges target types, squeezes
+    # output and fails if input lengths differ.
     IND = 'multilabel-indicator'
     SEQ = 'multilabel-sequences'
     MC = 'multiclass'
@@ -1274,7 +1262,7 @@ def test_log_loss():
 
 
 def test_brier_score_loss():
-    """Check brier_score_loss function"""
+    # Check brier_score_loss function
     y_true = np.array([0, 1, 1, 0, 1, 1])
     y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95])
     true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true)
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index d04784f305538..166bc1fbc53c2 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -346,7 +346,7 @@
 
 @ignore_warnings
 def test_symmetry():
-    """Test the symmetry of score and loss functions"""
+    # Test the symmetry of score and loss functions
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(20, ))
     y_pred = random_state.randint(0, 2, size=(20, ))
@@ -513,7 +513,7 @@ def test_format_invariance_with_1d_vectors():
 
 @ignore_warnings
 def test_invariance_string_vs_numbers_labels():
-    """Ensure that classification metrics with string labels"""
+    # Ensure that classification metrics with string labels
     random_state = check_random_state(0)
     y1 = random_state.randint(0, 2, size=(20, ))
     y2 = random_state.randint(0, 2, size=(20, ))
@@ -586,12 +586,10 @@ def test_invariance_string_vs_numbers_labels():
 
 @ignore_warnings
 def check_single_sample(name):
-    """Non-regression test: scores should work with a single sample.
-
-    This is important for leave-one-out cross validation.
-    Score functions tested are those that formerly called np.squeeze,
-    which turns an array of size 1 into a 0-d array (!).
-    """
+    # Non-regression test: scores should work with a single sample.
+    # This is important for leave-one-out cross validation.
+    # Score functions tested are those that formerly called np.squeeze,
+    # which turns an array of size 1 into a 0-d array (!).
     metric = ALL_METRICS[name]
 
     # assert that no exception is thrown
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index b5a375be361d2..43a8eaddb84b4 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -40,7 +40,7 @@
 
 
 def test_pairwise_distances():
-    """ Test the pairwise_distance helper function. """
+    # Test the pairwise_distance helper function.
     rng = np.random.RandomState(0)
     # Euclidean distance should be equivalent to calling the function.
     X = rng.random_sample((5, 4))
@@ -156,22 +156,20 @@ def test_pairwise_parallel():
 
 
 def test_pairwise_callable_nonstrict_metric():
-    """paired_distances should allow callable metric where metric(x, x) != 0
-
-    Knowing that the callable is a strict metric would allow the diagonal to
-    be left uncalculated and set to 0.
-    """
+    # paired_distances should allow callable metric where metric(x, x) != 0
+    # Knowing that the callable is a strict metric would allow the diagonal to
+    # be left uncalculated and set to 0.
     assert_equal(pairwise_distances([[1]], metric=lambda x, y: 5)[0, 0], 5)
 
 
 def callable_rbf_kernel(x, y, **kwds):
-    """ Callable version of pairwise.rbf_kernel. """
+    # Callable version of pairwise.rbf_kernel.
     K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds)
     return K
 
 
 def test_pairwise_kernels():
-    """ Test the pairwise_kernels helper function. """
+    # Test the pairwise_kernels helper function.
 
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
@@ -232,7 +230,7 @@ def test_pairwise_kernels_filter_param():
 
 
 def test_paired_distances():
-    """ Test the pairwise_distance helper function. """
+    # Test the pairwise_distance helper function.
     rng = np.random.RandomState(0)
     # Euclidean distance should be equivalent to calling the function.
     X = rng.random_sample((5, 4))
@@ -263,7 +261,7 @@ def test_paired_distances():
 
 
 def test_pairwise_distances_argmin_min():
-    """ Check pairwise minimum distances computation for any metric"""
+    # Check pairwise minimum distances computation for any metric
     X = [[0], [1]]
     Y = [[-1], [2]]
 
@@ -325,7 +323,7 @@ def test_pairwise_distances_argmin_min():
 
 
 def test_euclidean_distances():
-    """ Check the pairwise Euclidean distances computation"""
+    # Check the pairwise Euclidean distances computation
     X = [[0]]
     Y = [[1], [2]]
     D = euclidean_distances(X, Y)
@@ -340,7 +338,7 @@ def test_euclidean_distances():
 # Paired distances
 
 def test_paired_euclidean_distances():
-    """ Check the paired Euclidean distances computation"""
+    # Check the paired Euclidean distances computation
     X = [[0], [0]]
     Y = [[1], [2]]
     D = paired_euclidean_distances(X, Y)
@@ -348,7 +346,7 @@ def test_paired_euclidean_distances():
 
 
 def test_paired_manhattan_distances():
-    """ Check the paired manhattan distances computation"""
+    # Check the paired manhattan distances computation
     X = [[0], [0]]
     Y = [[1], [2]]
     D = paired_manhattan_distances(X, Y)
@@ -411,7 +409,7 @@ def test_chi_square_kernel():
 
 
 def test_kernel_symmetry():
-    """ Valid kernels should be symmetric"""
+    # Valid kernels should be symmetric
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
     for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
@@ -448,7 +446,7 @@ def test_rbf_kernel():
 
 
 def test_cosine_similarity():
-    """ Test the cosine_similarity. """
+    # Test the cosine_similarity.
 
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
@@ -469,7 +467,7 @@ def test_cosine_similarity():
 
 
 def test_check_dense_matrices():
-    """ Ensure that pairwise array check works for dense matrices."""
+    # Ensure that pairwise array check works for dense matrices.
     # Check that if XB is None, XB is returned as reference to XA
     XA = np.resize(np.arange(40), (5, 8))
     XA_checked, XB_checked = check_pairwise_arrays(XA, None)
@@ -478,7 +476,7 @@ def test_check_dense_matrices():
 
 
 def test_check_XB_returned():
-    """ Ensure that if XA and XB are given correctly, they return as equal."""
+    # Ensure that if XA and XB are given correctly, they return as equal.
     # Check that if XB is not None, it is returned equal.
     # Note that the second dimension of XB is the same as XA.
     XA = np.resize(np.arange(40), (5, 8))
@@ -494,7 +492,7 @@ def test_check_XB_returned():
 
 
 def test_check_different_dimensions():
-    """ Ensure an error is raised if the dimensions are different. """
+    # Ensure an error is raised if the dimensions are different.
     XA = np.resize(np.arange(45), (5, 9))
     XB = np.resize(np.arange(32), (4, 8))
     assert_raises(ValueError, check_pairwise_arrays, XA, XB)
@@ -504,7 +502,7 @@ def test_check_different_dimensions():
 
 
 def test_check_invalid_dimensions():
-    """ Ensure an error is raised on 1D input arrays. """
+    # Ensure an error is raised on 1D input arrays.
     XA = np.arange(45)
     XB = np.resize(np.arange(32), (4, 8))
     assert_raises(ValueError, check_pairwise_arrays, XA, XB)
@@ -514,7 +512,7 @@ def test_check_invalid_dimensions():
 
 
 def test_check_sparse_arrays():
-    """ Ensures that checks return valid sparse matrices. """
+    # Ensures that checks return valid sparse matrices.
     rng = np.random.RandomState(0)
     XA = rng.random_sample((5, 4))
     XA_sparse = csr_matrix(XA)
@@ -536,7 +534,7 @@ def test_check_sparse_arrays():
 
 
 def tuplify(X):
-    """ Turns a numpy matrix (any n-dimensional array) into tuples."""
+    # Turns a numpy matrix (any n-dimensional array) into tuples.
     s = X.shape
     if len(s) > 1:
         # Tuplify each sub-array in the input.
@@ -547,7 +545,7 @@ def tuplify(X):
 
 
 def test_check_tuple_input():
-    """ Ensures that checks return valid tuples. """
+    # Ensures that checks return valid tuples.
     rng = np.random.RandomState(0)
     XA = rng.random_sample((5, 4))
     XA_tuples = tuplify(XA)
@@ -559,7 +557,7 @@ def test_check_tuple_input():
 
 
 def test_check_preserve_type():
-    """ Ensures that type float32 is preserved. """
+    # Ensures that type float32 is preserved.
     XA = np.resize(np.arange(40), (5, 8)).astype(np.float32)
     XB = np.resize(np.arange(40), (5, 8)).astype(np.float32)
 
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 40ce6f659c569..1dd4d385f93b7 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -123,7 +123,7 @@ def _average_precision(y_true, y_score):
 
 
 def test_roc_curve():
-    """Test Area under Receiver Operating Characteristic (ROC) curve"""
+    # Test Area under Receiver Operating Characteristic (ROC) curve
     y_true, _, probas_pred = make_prediction(binary=True)
 
     fpr, tpr, thresholds = roc_curve(y_true, probas_pred)
@@ -149,7 +149,7 @@ def test_roc_curve_end_points():
 
 
 def test_roc_returns_consistency():
-    """Test whether the returned threshold matches up with tpr"""
+    # Test whether the returned threshold matches up with tpr
     # make small toy dataset
     y_true, _, probas_pred = make_prediction(binary=True)
     fpr, tpr, thresholds = roc_curve(y_true, probas_pred)
@@ -168,10 +168,8 @@ def test_roc_returns_consistency():
 
 
 def test_roc_nonrepeating_thresholds():
-    """Test to ensure that we don't return spurious repeating thresholds.
-
-    Duplicated thresholds can arise due to machine precision issues.
-    """
+    # Test to ensure that we don't return spurious repeating thresholds.
+    # Duplicated thresholds can arise due to machine precision issues.
     dataset = datasets.load_digits()
     X = dataset['data']
     y = dataset['target']
@@ -193,14 +191,14 @@ def test_roc_nonrepeating_thresholds():
 
 
 def test_roc_curve_multi():
-    """roc_curve not applicable for multi-class problems"""
+    # roc_curve not applicable for multi-class problems
     y_true, _, probas_pred = make_prediction(binary=False)
 
     assert_raises(ValueError, roc_curve, y_true, probas_pred)
 
 
 def test_roc_curve_confidence():
-    """roc_curve for confidence scores"""
+    # roc_curve for confidence scores
     y_true, _, probas_pred = make_prediction(binary=True)
 
     fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5)
@@ -211,7 +209,7 @@ def test_roc_curve_confidence():
 
 
 def test_roc_curve_hard():
-    """roc_curve for hard decisions"""
+    # roc_curve for hard decisions
     y_true, pred, probas_pred = make_prediction(binary=True)
 
     # always predict one
@@ -350,7 +348,7 @@ def test_roc_curve_toydata():
 
 
 def test_auc():
-    """Test Area Under Curve (AUC) computation"""
+    # Test Area Under Curve (AUC) computation
     x = [0, 1]
     y = [0, 1]
     assert_array_almost_equal(auc(x, y), 0.5)
@@ -396,9 +394,8 @@ def test_auc_errors():
 
 
 def test_auc_score_non_binary_class():
-    """Test that roc_auc_score function returns an error when trying
-    to compute AUC for non-binary class values.
-    """
+    # Test that roc_auc_score function returns an error when trying
+    # to compute AUC for non-binary class values.
     rng = check_random_state(404)
     y_pred = rng.rand(10)
     # y_true contains only one class value
@@ -473,7 +470,7 @@ def test_precision_recall_curve_pos_label():
 
 
 def _test_precision_recall_curve(y_true, probas_pred):
-    """Test Precision-Recall and aread under PR curve"""
+    # Test Precision-Recall and aread under PR curve
     p, r, thresholds = precision_recall_curve(y_true, probas_pred)
     precision_recall_auc = auc(r, p)
     assert_array_almost_equal(precision_recall_auc, 0.85, 2)
@@ -618,7 +615,7 @@ def test_score_scale_invariance():
 
 
 def check_lrap_toy(lrap_score):
-    """Check on several small example that it works """
+    # Check on several small example that it works
     assert_almost_equal(lrap_score([[0, 1]], [[0.25, 0.75]]), 1)
     assert_almost_equal(lrap_score([[0, 1]], [[0.75, 0.25]]), 1 / 2)
     assert_almost_equal(lrap_score([[1, 1]], [[0.75, 0.25]]), 1)
@@ -714,7 +711,7 @@ def check_lrap_error_raised(lrap_score):
 
 
 def check_lrap_only_ties(lrap_score):
-    """Check tie handling in score"""
+    # Check tie handling in score
     # Basic check with only ties and increasing label space
     for n_labels in range(2, 10):
         y_score = np.ones((1, n_labels))
@@ -730,7 +727,7 @@ def check_lrap_only_ties(lrap_score):
 
 
 def check_lrap_without_tie_and_increasing_score(lrap_score):
-    """ Check that Label ranking average precision works for various"""
+    # Check that Label ranking average precision works for various
     # Basic check with increasing label space size and decreasing score
     for n_labels in range(2, 10):
         y_score = n_labels - (np.arange(n_labels).reshape((1, n_labels)) + 1)
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 3c97dd66e7c02..5a2523bf1dcf7 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -83,7 +83,7 @@ def __call__(self, est, X, y):
 
 
 def test_check_scoring():
-    """Test all branches of check_scoring"""
+    # Test all branches of check_scoring
     estimator = EstimatorWithoutFit()
     pattern = (r"estimator should a be an estimator implementing 'fit' method,"
                r" .* was passed")
@@ -134,14 +134,14 @@ def test_check_scoring_gridsearchcv():
 
 
 def test_make_scorer():
-    """Sanity check on the make_scorer factory function."""
+    # Sanity check on the make_scorer factory function.
     f = lambda *args: 0
     assert_raises(ValueError, make_scorer, f, needs_threshold=True,
                   needs_proba=True)
 
 
 def test_classification_scores():
-    """Test classification scorers."""
+    # Test classification scorers.
     X, y = make_blobs(random_state=0, centers=2)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
     clf = LinearSVC(random_state=0)
@@ -185,7 +185,7 @@ def test_classification_scores():
 
 
 def test_regression_scorers():
-    """Test regression scorers."""
+    # Test regression scorers.
     diabetes = load_diabetes()
     X, y = diabetes.data, diabetes.target
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
@@ -197,7 +197,7 @@ def test_regression_scorers():
 
 
 def test_thresholded_scorers():
-    """Test scorers that take thresholds."""
+    # Test scorers that take thresholds.
     X, y = make_blobs(random_state=0, centers=2)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
     clf = LogisticRegression(random_state=0)
@@ -227,9 +227,8 @@ def test_thresholded_scorers():
 
 
 def test_thresholded_scorers_multilabel_indicator_data():
-    """Test that the scorer work with multilabel-indicator format
-    for multilabel and multi-output multi-class classifier
-    """
+    # Test that the scorer work with multilabel-indicator format
+    # for multilabel and multi-output multi-class classifier
     X, y = make_multilabel_classification(return_indicator=True,
                                           allow_unlabeled=False,
                                           random_state=0)
@@ -272,7 +271,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
 
 
 def test_unsupervised_scorers():
-    """Test clustering scorers against gold standard labeling."""
+    # Test clustering scorers against gold standard labeling.
     # We don't have any real unsupervised Scorers yet.
     X, y = make_blobs(random_state=0, centers=2)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
@@ -285,7 +284,7 @@ def test_unsupervised_scorers():
 
 @ignore_warnings
 def test_raises_on_score_list():
-    """Test that when a list of scores is returned, we raise proper errors."""
+    # Test that when a list of scores is returned, we raise proper errors.
     X, y = make_blobs(random_state=0)
     f1_scorer_no_average = make_scorer(f1_score, average=None)
     clf = DecisionTreeClassifier()
@@ -298,7 +297,7 @@ def test_raises_on_score_list():
 
 @ignore_warnings
 def test_scorer_sample_weight():
-    """Test that scorers support sample_weight or raise sensible errors"""
+    # Test that scorers support sample_weight or raise sensible errors
 
     # Unlike the metrics invariance test, in the scorer case it's harder
     # to ensure that, on the classifier output, weighted and unweighted
diff --git a/sklearn/mixture/tests/test_gmm.py b/sklearn/mixture/tests/test_gmm.py
index 984218c4fae24..4f41e174aedb6 100644
--- a/sklearn/mixture/tests/test_gmm.py
+++ b/sklearn/mixture/tests/test_gmm.py
@@ -13,10 +13,8 @@
 
 
 def test_sample_gaussian():
-    """
-    Test sample generation from mixture.sample_gaussian where covariance
-    is diagonal, spherical and full
-    """
+    # Test sample generation from mixture.sample_gaussian where covariance
+    # is diagonal, spherical and full
 
     n_features, n_samples = 2, 300
     axis = 1
@@ -65,11 +63,9 @@ def _naive_lmvnpdf_diag(X, mu, cv):
 
 
 def test_lmvnpdf_diag():
-    """
-    test a slow and naive implementation of lmvnpdf and
-    compare it to the vectorized version (mixture.lmvnpdf) to test
-    for correctness
-    """
+    # test a slow and naive implementation of lmvnpdf and
+    # compare it to the vectorized version (mixture.lmvnpdf) to test
+    # for correctness
     n_features, n_components, n_samples = 2, 3, 10
     mu = rng.randint(10) * rng.rand(n_components, n_features)
     cv = (rng.rand(n_components, n_features) + 1.0) ** 2
@@ -237,8 +233,7 @@ def test_train(self, params='wmc'):
             % (delta_min, self.threshold, self.covariance_type, trainll))
 
     def test_train_degenerate(self, params='wmc'):
-        """ Train on degenerate data with 0 in some dimensions
-        """
+        # Train on degenerate data with 0 in some dimensions
         # Create a training set by sampling from the predefined distribution.
         X = rng.randn(100, self.n_features)
         X.T[1:] = 0
@@ -250,8 +245,7 @@ def test_train_degenerate(self, params='wmc'):
         self.assertTrue(np.sum(np.abs(trainll / 100 / X.shape[1])) < 5)
 
     def test_train_1d(self, params='wmc'):
-        """ Train on 1-D data
-        """
+        # Train on 1-D data
         # Create a training set by sampling from the predefined distribution.
         X = rng.randn(100, 1)
         #X.T[1:] = 0
@@ -294,7 +288,7 @@ class TestGMMWithFullCovars(unittest.TestCase, GMMTester):
 
 
 def test_multiple_init():
-    """Test that multiple inits does not much worse than a single one"""
+    # Test that multiple inits does not much worse than a single one
     X = rng.randn(30, 5)
     X[:10] += 2
     g = mixture.GMM(n_components=2, covariance_type='spherical',
@@ -306,7 +300,7 @@ def test_multiple_init():
 
 
 def test_n_parameters():
-    """Test that the right number of parameters is estimated"""
+    # Test that the right number of parameters is estimated
     n_samples, n_dim, n_components = 7, 5, 2
     X = rng.randn(n_samples, n_dim)
     n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41}
@@ -318,10 +312,8 @@ def test_n_parameters():
 
 
 def test_1d_1component():
-    """
-    Test all of the covariance_types return the same BIC score for
-    1-dimensional, 1 component fits.
-    """
+    # Test all of the covariance_types return the same BIC score for
+    # 1-dimensional, 1 component fits.
     n_samples, n_dim, n_components = 100, 1, 1
     X = rng.randn(n_samples, n_dim)
     g_full = mixture.GMM(n_components=n_components, covariance_type='full',
@@ -336,7 +328,7 @@ def test_1d_1component():
 
 
 def test_aic():
-    """ Test the aic and bic criteria"""
+    # Test the aic and bic criteria
     n_samples, n_dim, n_components = 50, 3, 2
     X = rng.randn(n_samples, n_dim)
     SGH = 0.5 * (X.var() + np.log(2 * np.pi))  # standard gaussian entropy
diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py
index 8612e5bef5c62..50794e263c831 100644
--- a/sklearn/neighbors/tests/test_approximate.py
+++ b/sklearn/neighbors/tests/test_approximate.py
@@ -27,7 +27,7 @@
 
 
 def test_neighbors_accuracy_with_n_candidates():
-    """Checks whether accuracy increases as `n_candidates` increases."""
+    # Checks whether accuracy increases as `n_candidates` increases.
     n_candidates_values = np.array([.1, 50, 500])
     n_samples = 100
     n_features = 10
@@ -61,7 +61,7 @@ def test_neighbors_accuracy_with_n_candidates():
 
 
 def test_neighbors_accuracy_with_n_estimators():
-    """Checks whether accuracy increases as `n_estimators` increases."""
+    # Checks whether accuracy increases as `n_estimators` increases.
     n_estimators = np.array([1, 10, 100])
     n_samples = 100
     n_features = 10
@@ -96,12 +96,10 @@ def test_neighbors_accuracy_with_n_estimators():
 
 @ignore_warnings
 def test_kneighbors():
-    """Checks whether desired number of neighbors are returned.
-
-    It is guaranteed to return the requested number of neighbors
-    if `min_hash_match` is set to 0. Returned distances should be
-    in ascending order.
-    """
+    # Checks whether desired number of neighbors are returned.
+    # It is guaranteed to return the requested number of neighbors
+    # if `min_hash_match` is set to 0. Returned distances should be
+    # in ascending order.
     n_samples = 12
     n_features = 2
     n_iter = 10
@@ -147,14 +145,12 @@ def test_kneighbors():
 
 
 def test_radius_neighbors():
-    """Checks whether Returned distances are less than `radius`
-
-    At least one point should be returned when the `radius` is set
-    to mean distance from the considering point to other points in
-    the database.
-    Moreover, this test compares the radius neighbors of LSHForest
-    with the `sklearn.neighbors.NearestNeighbors`.
-    """
+    # Checks whether Returned distances are less than `radius`
+    # At least one point should be returned when the `radius` is set
+    # to mean distance from the considering point to other points in
+    # the database.
+    # Moreover, this test compares the radius neighbors of LSHForest
+    # with the `sklearn.neighbors.NearestNeighbors`.
     n_samples = 12
     n_features = 2
     n_iter = 10
@@ -281,7 +277,7 @@ def test_radius_neighbors_boundary_handling():
 
 
 def test_distances():
-    """Checks whether returned neighbors are from closest to farthest."""
+    # Checks whether returned neighbors are from closest to farthest.
     n_samples = 12
     n_features = 2
     n_iter = 10
@@ -307,7 +303,7 @@ def test_distances():
 
 
 def test_fit():
-    """Checks whether `fit` method sets all attribute values correctly."""
+    # Checks whether `fit` method sets all attribute values correctly.
     n_samples = 12
     n_features = 2
     n_estimators = 5
@@ -334,10 +330,8 @@ def test_fit():
 
 
 def test_partial_fit():
-    """Checks whether inserting array is consitent with fitted data.
-
-    `partial_fit` method should set all attribute values correctly.
-    """
+    # Checks whether inserting array is consitent with fitted data.
+    # `partial_fit` method should set all attribute values correctly.
     n_samples = 12
     n_samples_partial_fit = 3
     n_features = 2
@@ -371,13 +365,11 @@ def test_partial_fit():
 
 
 def test_hash_functions():
-    """Checks randomness of hash functions.
-
-    Variance and mean of each hash function (projection vector)
-    should be different from flattened array of hash functions.
-    If hash functions are not randomly built (seeded with
-    same value), variances and means of all functions are equal.
-    """
+    # Checks randomness of hash functions.
+    # Variance and mean of each hash function (projection vector)
+    # should be different from flattened array of hash functions.
+    # If hash functions are not randomly built (seeded with
+    # same value), variances and means of all functions are equal.
     n_samples = 12
     n_features = 2
     n_estimators = 5
@@ -402,12 +394,10 @@ def test_hash_functions():
 
 
 def test_candidates():
-    """Checks whether candidates are sufficient.
-
-    This should handle the cases when number of candidates is 0.
-    User should be warned when number of candidates is less than
-    requested number of neighbors.
-    """
+    # Checks whether candidates are sufficient.
+    # This should handle the cases when number of candidates is 0.
+    # User should be warned when number of candidates is less than
+    # requested number of neighbors.
     X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1],
                         [6, 10, 2]], dtype=np.float32)
     X_test = np.array([7, 10, 3], dtype=np.float32)
@@ -442,7 +432,7 @@ def test_candidates():
 
 
 def test_graphs():
-    """Smoke tests for graph methods."""
+    # Smoke tests for graph methods.
     n_samples_sizes = [5, 10, 20]
     n_features = 3
     rng = np.random.RandomState(42)
diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py
index 0c248fdadb7ce..7a26af1833665 100644
--- a/sklearn/neighbors/tests/test_ball_tree.py
+++ b/sklearn/neighbors/tests/test_ball_tree.py
@@ -177,7 +177,7 @@ def check_results(kernel, h, atol, rtol, breadth_first):
 
 
 def test_gaussian_kde(n_samples=1000):
-    """Compare gaussian KDE results to scipy.stats.gaussian_kde"""
+    # Compare gaussian KDE results to scipy.stats.gaussian_kde
     from scipy.stats import gaussian_kde
     np.random.seed(0)
     x_in = np.random.normal(0, 1, n_samples)
diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py
index c949f1035216a..8bccc46087996 100644
--- a/sklearn/neighbors/tests/test_kd_tree.py
+++ b/sklearn/neighbors/tests/test_kd_tree.py
@@ -133,7 +133,7 @@ def check_results(kernel, h, atol, rtol, breadth_first):
 
 
 def test_gaussian_kde(n_samples=1000):
-    """Compare gaussian KDE results to scipy.stats.gaussian_kde"""
+    # Compare gaussian KDE results to scipy.stats.gaussian_kde
     from scipy.stats import gaussian_kde
     np.random.seed(0)
     x_in = np.random.normal(0, 1, n_samples)
diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py
index 132ae3171d06c..c1c63288d6473 100644
--- a/sklearn/neighbors/tests/test_kde.py
+++ b/sklearn/neighbors/tests/test_kde.py
@@ -90,7 +90,7 @@ def test_kernel_density_sampling(n_samples=100, n_features=3):
 
 
 def test_kde_algorithm_metric_choice():
-    """Smoke test for various metrics and algorithms"""
+    # Smoke test for various metrics and algorithms
     rng = np.random.RandomState(0)
     X = rng.randn(10, 2)    # 2 features required for haversine dist.
     Y = rng.randn(10, 2)
diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py
index 9922434c3b869..8ecb226edef17 100644
--- a/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -29,7 +29,7 @@
 
 
 def test_classification_toy():
-    """Check classification on a toy dataset, including sparse versions."""
+    # Check classification on a toy dataset, including sparse versions.
     clf = NearestCentroid()
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
@@ -63,7 +63,7 @@ def test_precomputed():
 
 
 def test_iris():
-    """Check consistency on dataset iris."""
+    # Check consistency on dataset iris.
     for metric in ('euclidean', 'cosine'):
         clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
         score = np.mean(clf.predict(iris.data) == iris.target)
@@ -71,7 +71,7 @@ def test_iris():
 
 
 def test_iris_shrinkage():
-    """Check consistency on dataset iris, when using shrinkage."""
+    # Check consistency on dataset iris, when using shrinkage.
     for metric in ('euclidean', 'cosine'):
         for shrink_threshold in [None, 0.1, 0.5]:
             clf = NearestCentroid(metric=metric,
@@ -109,7 +109,7 @@ def test_shrinkage_threshold_decoded_y():
 
 
 def test_predict_translated_data():
-    """Test that NearestCentroid gives same results on translated data"""
+    # Test that NearestCentroid gives same results on translated data
 
     rng = np.random.RandomState(0)
     X = rng.rand(50, 50)
@@ -126,7 +126,7 @@ def test_predict_translated_data():
 
 
 def test_manhattan_metric():
-    """Test the manhattan metric."""
+    # Test the manhattan metric.
 
     clf = NearestCentroid(metric='manhattan')
     clf.fit(X, y)
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 270b470a6d3fa..494f3fe11f14d 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -56,7 +56,7 @@ def _weight_func(dist):
 
 def test_unsupervised_kneighbors(n_samples=20, n_features=5,
                                  n_query_pts=2, n_neighbors=5):
-    """Test unsupervised neighbors methods"""
+    # Test unsupervised neighbors methods
     X = rng.rand(n_samples, n_features)
 
     test = rng.rand(n_query_pts, n_features)
@@ -82,7 +82,7 @@ def test_unsupervised_kneighbors(n_samples=20, n_features=5,
 
 
 def test_unsupervised_inputs():
-    """test the types of valid input into NearestNeighbors"""
+    # test the types of valid input into NearestNeighbors
     X = rng.random_sample((10, 3))
 
     nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
@@ -103,7 +103,7 @@ def test_unsupervised_inputs():
 def test_unsupervised_radius_neighbors(n_samples=20, n_features=5,
                                        n_query_pts=2, radius=0.5,
                                        random_state=0):
-    """Test unsupervised radius-based query"""
+    # Test unsupervised radius-based query
     rng = np.random.RandomState(random_state)
 
     X = rng.rand(n_samples, n_features)
@@ -146,7 +146,7 @@ def test_kneighbors_classifier(n_samples=40,
                                n_test_pts=10,
                                n_neighbors=5,
                                random_state=0):
-    """Test k-neighbors classification"""
+    # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = ((X ** 2).sum(axis=1) < .5).astype(np.int)
@@ -172,7 +172,7 @@ def test_kneighbors_classifier(n_samples=40,
 def test_kneighbors_classifier_float_labels(n_samples=40, n_features=5,
                                             n_test_pts=10, n_neighbors=5,
                                             random_state=0):
-    """Test k-neighbors classification"""
+    # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = ((X ** 2).sum(axis=1) < .5).astype(np.int)
@@ -185,7 +185,7 @@ def test_kneighbors_classifier_float_labels(n_samples=40, n_features=5,
 
 
 def test_kneighbors_classifier_predict_proba():
-    """Test KNeighborsClassifier.predict_proba() method"""
+    # Test KNeighborsClassifier.predict_proba() method
     X = np.array([[0, 2, 0],
                   [0, 2, 1],
                   [2, 0, 0],
@@ -221,7 +221,7 @@ def test_radius_neighbors_classifier(n_samples=40,
                                      n_test_pts=10,
                                      radius=0.5,
                                      random_state=0):
-    """Test radius-based classification"""
+    # Test radius-based classification
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = ((X ** 2).sum(axis=1) < .5).astype(np.int)
@@ -244,8 +244,8 @@ def test_radius_neighbors_classifier(n_samples=40,
 
 
 def test_radius_neighbors_classifier_when_no_neighbors():
-    """ Test radius-based classifier when no neighbors found.
-    In this case it should rise an informative exception """
+    # Test radius-based classifier when no neighbors found.
+    # In this case it should rise an informative exception
 
     X = np.array([[1.0, 1.0], [2.0, 2.0]])
     y = np.array([1, 2])
@@ -273,8 +273,8 @@ def test_radius_neighbors_classifier_when_no_neighbors():
 
 
 def test_radius_neighbors_classifier_outlier_labeling():
-    """ Test radius-based classifier when no neighbors found and outliers
-    are labeled. """
+    # Test radius-based classifier when no neighbors found and outliers
+    # are labeled.
 
     X = np.array([[1.0, 1.0], [2.0, 2.0]])
     y = np.array([1, 2])
@@ -299,7 +299,7 @@ def test_radius_neighbors_classifier_outlier_labeling():
 
 
 def test_radius_neighbors_classifier_zero_distance():
-    """ Test radius-based classifier, when distance to a sample is zero. """
+    # Test radius-based classifier, when distance to a sample is zero.
 
     X = np.array([[1.0, 1.0], [2.0, 2.0]])
     y = np.array([1, 2])
@@ -320,7 +320,7 @@ def test_radius_neighbors_classifier_zero_distance():
 
 
 def test_neighbors_regressors_zero_distance():
-    """ Test radius-based regressor, when distance to a sample is zero. """
+    # Test radius-based regressor, when distance to a sample is zero.
 
     X = np.array([[1.0, 1.0], [1.0, 1.0], [2.0, 2.0], [2.5, 2.5]])
     y = np.array([1.0, 1.5, 2.0, 0.0])
@@ -371,7 +371,7 @@ def test_radius_neighbors_boundary_handling():
 
 
 def test_RadiusNeighborsClassifier_multioutput():
-    """Test k-NN classifier on multioutput data"""
+    # Test k-NN classifier on multioutput data
     rng = check_random_state(0)
     n_features = 2
     n_samples = 40
@@ -411,7 +411,7 @@ def test_kneighbors_classifier_sparse(n_samples=40,
                                       n_test_pts=10,
                                       n_neighbors=5,
                                       random_state=0):
-    """Test k-NN classifier on sparse matrices"""
+    # Test k-NN classifier on sparse matrices
     # Like the above, but with various types of sparse matrices
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
@@ -430,7 +430,7 @@ def test_kneighbors_classifier_sparse(n_samples=40,
 
 
 def test_KNeighborsClassifier_multioutput():
-    """Test k-NN classifier on multioutput data"""
+    # Test k-NN classifier on multioutput data
     rng = check_random_state(0)
     n_features = 5
     n_samples = 50
@@ -480,7 +480,7 @@ def test_kneighbors_regressor(n_samples=40,
                               n_test_pts=10,
                               n_neighbors=3,
                               random_state=0):
-    """Test k-neighbors regression"""
+    # Test k-neighbors regression
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = np.sqrt((X ** 2).sum(1))
@@ -502,7 +502,7 @@ def test_kneighbors_regressor(n_samples=40,
 
 
 def test_KNeighborsRegressor_multioutput_uniform_weight():
-    """Test k-neighbors in multi-output regression with uniform weight"""
+    # Test k-neighbors in multi-output regression with uniform weight
     rng = check_random_state(0)
     n_features = 5
     n_samples = 40
@@ -533,7 +533,7 @@ def test_kneighbors_regressor_multioutput(n_samples=40,
                                           n_test_pts=10,
                                           n_neighbors=3,
                                           random_state=0):
-    """Test k-neighbors in multi-output regression"""
+    # Test k-neighbors in multi-output regression
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = np.sqrt((X ** 2).sum(1))
@@ -560,7 +560,7 @@ def test_radius_neighbors_regressor(n_samples=40,
                                     n_test_pts=10,
                                     radius=0.5,
                                     random_state=0):
-    """Test radius-based neighbors regression"""
+    # Test radius-based neighbors regression
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = np.sqrt((X ** 2).sum(1))
@@ -582,7 +582,7 @@ def test_radius_neighbors_regressor(n_samples=40,
 
 
 def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight():
-    """Test radius neighbors in multi-output regression (uniform weight)"""
+    # Test radius neighbors in multi-output regression (uniform weight)
 
     rng = check_random_state(0)
     n_features = 5
@@ -616,7 +616,7 @@ def test_RadiusNeighborsRegressor_multioutput(n_samples=40,
                                               n_test_pts=10,
                                               n_neighbors=3,
                                               random_state=0):
-    """Test k-neighbors in multi-output regression with various weight"""
+    # Test k-neighbors in multi-output regression with various weight
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
     y = np.sqrt((X ** 2).sum(1))
@@ -643,7 +643,7 @@ def test_kneighbors_regressor_sparse(n_samples=40,
                                      n_test_pts=10,
                                      n_neighbors=5,
                                      random_state=0):
-    """Test radius-based regression on sparse matrices"""
+    # Test radius-based regression on sparse matrices
     # Like the above, but with various types of sparse matrices
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features) - 1
@@ -659,11 +659,9 @@ def test_kneighbors_regressor_sparse(n_samples=40,
 
 
 def test_neighbors_iris():
-    """Sanity checks on the iris dataset
-
-    Puts three points of each label in the plane and performs a
-    nearest neighbor query on points near the decision boundary.
-    """
+    # Sanity checks on the iris dataset
+    # Puts three points of each label in the plane and performs a
+    # nearest neighbor query on points near the decision boundary.
 
     for algorithm in ALGORITHMS:
         clf = neighbors.KNeighborsClassifier(n_neighbors=1,
@@ -682,11 +680,9 @@ def test_neighbors_iris():
 
 
 def test_neighbors_digits():
-    """Sanity check on the digits dataset
-
-    the 'brute' algorithm has been observed to fail if the input
-    dtype is uint8 due to overflow in distance calculations.
-    """
+    # Sanity check on the digits dataset
+    # the 'brute' algorithm has been observed to fail if the input
+    # dtype is uint8 due to overflow in distance calculations.
 
     X = digits.data.astype('uint8')
     Y = digits.target
@@ -704,7 +700,7 @@ def test_neighbors_digits():
 
 
 def test_kneighbors_graph():
-    """Test kneighbors_graph to build the k-Nearest Neighbor graph."""
+    # Test kneighbors_graph to build the k-Nearest Neighbor graph.
     X = np.array([[0, 1], [1.01, 1.], [2, 0]])
 
     # n_neighbors = 1
@@ -741,8 +737,8 @@ def test_kneighbors_graph():
 
 
 def test_kneighbors_graph_sparse(seed=36):
-    """Test kneighbors_graph to build the k-Nearest Neighbor graph
-    for sparse input."""
+    # Test kneighbors_graph to build the k-Nearest Neighbor graph
+    # for sparse input.
     rng = np.random.RandomState(seed)
     X = rng.randn(10, 10)
     Xcsr = csr_matrix(X)
@@ -759,7 +755,7 @@ def test_kneighbors_graph_sparse(seed=36):
 
 
 def test_radius_neighbors_graph():
-    """Test radius_neighbors_graph to build the Nearest Neighbor graph."""
+    # Test radius_neighbors_graph to build the Nearest Neighbor graph.
     X = np.array([[0, 1], [1.01, 1.], [2, 0]])
 
     A = neighbors.radius_neighbors_graph(X, 1.5, mode='connectivity')
@@ -778,8 +774,8 @@ def test_radius_neighbors_graph():
 
 
 def test_radius_neighbors_graph_sparse(seed=36):
-    """Test radius_neighbors_graph to build the Nearest Neighbor graph
-    for sparse input."""
+    # Test radius_neighbors_graph to build the Nearest Neighbor graph
+    # for sparse input.
     rng = np.random.RandomState(seed)
     X = rng.randn(10, 10)
     Xcsr = csr_matrix(X)
@@ -796,7 +792,7 @@ def test_radius_neighbors_graph_sparse(seed=36):
 
 
 def test_neighbors_badargs():
-    """Test bad argument values: these should all raise ValueErrors"""
+    # Test bad argument values: these should all raise ValueErrors
     assert_raises(ValueError,
                   neighbors.NearestNeighbors,
                   algorithm='blah')
@@ -847,7 +843,7 @@ def test_neighbors_badargs():
 
 def test_neighbors_metrics(n_samples=20, n_features=3,
                            n_query_pts=2, n_neighbors=5):
-    """Test computing the neighbors for various metrics"""
+    # Test computing the neighbors for various metrics
     # create a symmetric matrix
     V = rng.rand(n_features, n_features)
     VI = np.dot(V, V.T)
@@ -966,7 +962,7 @@ def check_object_arrays(nparray, list_check):
 
 
 def test_k_and_radius_neighbors_train_is_not_query():
-    """Test kneighbors et.al when query is not training data"""
+    # Test kneighbors et.al when query is not training data
 
     for algorithm in ALGORITHMS:
 
@@ -995,7 +991,7 @@ def test_k_and_radius_neighbors_train_is_not_query():
 
 
 def test_k_and_radius_neighbors_X_None():
-    """Test kneighbors et.al when query is None"""
+    # Test kneighbors et.al when query is None
     for algorithm in ALGORITHMS:
 
         nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)
@@ -1027,7 +1023,7 @@ def test_k_and_radius_neighbors_X_None():
 
 
 def test_k_and_radius_neighbors_duplicates():
-    """Test behavior of kneighbors when duplicates are present in query"""
+    # Test behavior of kneighbors when duplicates are present in query
 
     for algorithm in ALGORITHMS:
         nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)
@@ -1074,7 +1070,7 @@ def test_k_and_radius_neighbors_duplicates():
 
 
 def test_include_self_neighbors_graph():
-    """Test include_self parameter in neighbors_graph"""
+    # Test include_self parameter in neighbors_graph
     X = [[2, 3], [4, 5]]
     kng = neighbors.kneighbors_graph(X, 1, include_self=True).A
     kng_not_self = neighbors.kneighbors_graph(X, 1, include_self=False).A
diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py
index 6041e246f1dcd..a735954fcb5be 100644
--- a/sklearn/neural_network/tests/test_rbm.py
+++ b/sklearn/neural_network/tests/test_rbm.py
@@ -59,7 +59,7 @@ def test_transform():
 
 
 def test_small_sparse():
-    """BernoulliRBM should work on small sparse matrices."""
+    # BernoulliRBM should work on small sparse matrices.
     X = csr_matrix(Xdigits[:4])
     BernoulliRBM().fit(X)       # no exception
 
@@ -96,10 +96,8 @@ def test_sample_hiddens():
 
 
 def test_fit_gibbs():
-    """
-    Gibbs on the RBM hidden layer should be able to recreate [[0], [1]]
-    from the same input
-    """
+    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]]
+    # from the same input
     rng = np.random.RandomState(42)
     X = np.array([[0.], [1.]])
     rbm1 = BernoulliRBM(n_components=2, batch_size=2,
@@ -113,10 +111,8 @@ def test_fit_gibbs():
 
 
 def test_fit_gibbs_sparse():
-    """
-    Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from
-    the same input even when the input is sparse, and test against non-sparse
-    """
+    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from
+    # the same input even when the input is sparse, and test against non-sparse
     rbm1 = test_fit_gibbs()
     rng = np.random.RandomState(42)
     from scipy.sparse import csc_matrix
@@ -131,8 +127,8 @@ def test_fit_gibbs_sparse():
 
 
 def test_gibbs_smoke():
-    """Check if we don't get NaNs sampling the full digits dataset.
-    Also check that sampling again will yield different results."""
+    # Check if we don't get NaNs sampling the full digits dataset.
+    # Also check that sampling again will yield different results.
     X = Xdigits
     rbm1 = BernoulliRBM(n_components=42, batch_size=40,
                         n_iter=20, random_state=42)
@@ -144,7 +140,7 @@ def test_gibbs_smoke():
 
 
 def test_score_samples():
-    """Test score_samples (pseudo-likelihood) method."""
+    # Test score_samples (pseudo-likelihood) method.
     # Assert that pseudo-likelihood is computed without clipping.
     # See Fabian's blog, http://bit.ly/1iYefRk
     rng = np.random.RandomState(42)
@@ -179,9 +175,7 @@ def test_rbm_verbose():
 
 
 def test_sparse_and_verbose():
-    """
-    Make sure RBM works with sparse input when verbose=True
-    """
+    # Make sure RBM works with sparse input when verbose=True
     old_stdout = sys.stdout
     sys.stdout = StringIO()
     from scipy.sparse import csc_matrix
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index d32fddb5071c5..8fd59edea8c88 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -40,7 +40,7 @@ def toarray(a):
 
 
 def test_polynomial_features():
-    """Test Polynomial Features"""
+    # Test Polynomial Features
     X1 = np.arange(6)[:, np.newaxis]
     P1 = np.hstack([np.ones_like(X1),
                     X1, X1 ** 2, X1 ** 3])
@@ -70,7 +70,7 @@ def test_polynomial_features():
 
 
 def test_scaler_1d():
-    """Test scaling of dataset along single axis"""
+    # Test scaling of dataset along single axis
     rng = np.random.RandomState(0)
     X = rng.randn(5)
     X_orig_copy = X.copy()
@@ -100,7 +100,7 @@ def test_scaler_1d():
 
 
 def test_scaler_2d_arrays():
-    """Test scaling of 2d array along first axis"""
+    # Test scaling of 2d array along first axis
     rng = np.random.RandomState(0)
     X = rng.randn(4, 5)
     X[:, 0] = 0.0  # first feature is always of zero
@@ -181,7 +181,7 @@ def test_min_max_scaler_iris():
 
 
 def test_min_max_scaler_zero_variance_features():
-    """Check min max scaler on toy data with zero variance features"""
+    # Check min max scaler on toy data with zero variance features
     X = [[0., 1., +0.5],
          [0., 1., -0.1],
          [0., 1., +1.1]]
@@ -216,7 +216,7 @@ def test_min_max_scaler_zero_variance_features():
 
 
 def test_min_max_scaler_1d():
-    """Test scaling of dataset along single axis"""
+    # Test scaling of dataset along single axis
     rng = np.random.RandomState(0)
     X = rng.randn(5)
     X_orig_copy = X.copy()
@@ -378,7 +378,7 @@ def test_scaler_int():
 
 
 def test_scaler_without_copy():
-    """Check that StandardScaler.fit does not change input"""
+    # Check that StandardScaler.fit does not change input
     rng = np.random.RandomState(42)
     X = rng.randn(4, 5)
     X[:, 0] = 0.0  # first feature is always of zero
@@ -411,7 +411,7 @@ def test_scale_sparse_with_mean_raise_exception():
 
 
 def test_scale_input_finiteness_validation():
-    """Check if non finite inputs raise ValueError"""
+    # Check if non finite inputs raise ValueError
     X = [np.nan, 5, 6, 7, 8]
     assert_raises_regex(ValueError,
                         "Input contains NaN, infinity or a value too large",
@@ -454,7 +454,7 @@ def test_scale_function_without_centering():
 
 
 def test_warning_scaling_integers():
-    """Check warning when scaling integer data"""
+    # Check warning when scaling integer data
     X = np.array([[1, 2, 0],
                   [0, 0, 0]], dtype=np.uint8)
 
@@ -564,7 +564,7 @@ def test_normalizer_l2():
 
 
 def test_normalize():
-    """Test normalize function"""
+    # Test normalize function
     # Only tests functionality not used by the tests for Normalizer.
     X = np.random.RandomState(37).randn(3, 2)
     assert_array_equal(normalize(X, copy=False),
@@ -622,8 +622,8 @@ def test_binarizer():
 
 
 def test_center_kernel():
-    """Test that KernelCenterer is equivalent to StandardScaler
-       in feature space"""
+    # Test that KernelCenterer is equivalent to StandardScaler
+       # in feature space
     rng = np.random.RandomState(0)
     X_fit = rng.random_sample((5, 4))
     scaler = StandardScaler(with_std=False)
@@ -683,7 +683,7 @@ def test_add_dummy_feature_csr():
 
 
 def test_one_hot_encoder_sparse():
-    """Test OneHotEncoder's fit and transform."""
+    # Test OneHotEncoder's fit and transform.
     X = [[3, 2, 1], [0, 1, 1]]
     enc = OneHotEncoder()
     # discover max values automatically
@@ -736,7 +736,7 @@ def test_one_hot_encoder_sparse():
     assert_raises(ValueError, enc.transform, [[0], [-1]])
 
 def test_one_hot_encoder_dense():
-    """check for sparse=False"""
+    # check for sparse=False
     X = [[3, 2, 1], [0, 1, 1]]
     enc = OneHotEncoder(sparse=False)
     # discover max values automatically
diff --git a/sklearn/preprocessing/tests/test_imputation.py b/sklearn/preprocessing/tests/test_imputation.py
index 608fdacee6e12..bfcfc2a753b6a 100644
--- a/sklearn/preprocessing/tests/test_imputation.py
+++ b/sklearn/preprocessing/tests/test_imputation.py
@@ -75,7 +75,7 @@ def _check_statistics(X, X_true,
 
 
 def test_imputation_shape():
-    """Verify the shapes of the imputed matrix for different strategies."""
+    # Verify the shapes of the imputed matrix for different strategies.
     X = np.random.randn(10, 2)
     X[::2] = np.nan
 
@@ -88,8 +88,8 @@ def test_imputation_shape():
 
 
 def test_imputation_mean_median_only_zero():
-    """Test imputation using the mean and median strategies, when
-       missing_values == 0."""
+    # Test imputation using the mean and median strategies, when
+    # missing_values == 0.
     X = np.array([
         [np.nan, 0, 0,  0,  5],
         [np.nan, 1, 0,  np.nan,  3],
@@ -122,8 +122,8 @@ def test_imputation_mean_median_only_zero():
 
 
 def test_imputation_mean_median():
-    """Test imputation using the mean and median strategies, when
-       missing_values != 0."""
+    # Test imputation using the mean and median strategies, when
+    # missing_values != 0.
     rng = np.random.RandomState(0)
 
     dim = 10
@@ -192,8 +192,7 @@ def test_imputation_mean_median():
 
 
 def test_imputation_median_special_cases():
-    """Test median imputation with sparse boundary cases
-    """
+    # Test median imputation with sparse boundary cases
     X = np.array([
         [0, np.nan, np.nan],  # odd: implicit zero
         [5, np.nan, np.nan],  # odd: explicit nonzero
@@ -222,7 +221,7 @@ def test_imputation_median_special_cases():
 
 
 def test_imputation_most_frequent():
-    """Test imputation using the most-frequent strategy."""
+    # Test imputation using the most-frequent strategy.
     X = np.array([
         [-1, -1,  0,  5],
         [-1,  2, -1,  3],
@@ -245,7 +244,7 @@ def test_imputation_most_frequent():
 
 
 def test_imputation_pipeline_grid_search():
-    """Test imputation within a pipeline + gridsearch."""
+    # Test imputation within a pipeline + gridsearch.
     pipeline = Pipeline([('imputer', Imputer(missing_values=0)),
                          ('tree', tree.DecisionTreeRegressor(random_state=0))])
 
@@ -262,7 +261,7 @@ def test_imputation_pipeline_grid_search():
 
 
 def test_imputation_pickle():
-    """Test for pickling imputers."""
+    # Test for pickling imputers.
     import pickle
 
     l = 100
@@ -281,7 +280,7 @@ def test_imputation_pickle():
 
 
 def test_imputation_copy():
-    """Test imputation with copy"""
+    # Test imputation with copy
     X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)
 
     # copy=True, dense => copy
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index 77629d140f422..0f3fda7adec2e 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -152,7 +152,7 @@ def test_label_binarizer_set_label_encoding():
 
 @ignore_warnings
 def test_label_binarizer_errors():
-    """Check that invalid arguments yield ValueError"""
+    # Check that invalid arguments yield ValueError
     one_class = np.array([0, 0, 0, 0])
     lb = LabelBinarizer().fit(one_class)
 
@@ -191,7 +191,7 @@ def test_label_binarizer_errors():
 
 
 def test_label_encoder():
-    """Test LabelEncoder's transform and inverse_transform methods"""
+    # Test LabelEncoder's transform and inverse_transform methods
     le = LabelEncoder()
     le.fit([1, 1, 4, 5, -1, 0])
     assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
@@ -203,7 +203,7 @@ def test_label_encoder():
 
 
 def test_label_encoder_fit_transform():
-    """Test fit_transform"""
+    # Test fit_transform
     le = LabelEncoder()
     ret = le.fit_transform([1, 1, 4, 5, -1, 0])
     assert_array_equal(ret, [2, 2, 3, 4, 0, 1])
@@ -214,7 +214,7 @@ def test_label_encoder_fit_transform():
 
 
 def test_label_encoder_errors():
-    """Check that invalid arguments yield ValueError"""
+    # Check that invalid arguments yield ValueError
     le = LabelEncoder()
     assert_raises(ValueError, le.transform, [])
     assert_raises(ValueError, le.inverse_transform, [])
@@ -333,8 +333,7 @@ def test_multilabel_binarizer_given_classes():
 
 
 def test_multilabel_binarizer_same_length_sequence():
-    """Ensure sequences of the same length are not interpreted as a 2-d array
-    """
+    # Ensure sequences of the same length are not interpreted as a 2-d array
     inp = [[1], [0], [2]]
     indicator_mat = np.array([[0, 1, 0],
                               [1, 0, 0],
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index d1b93d5299da2..eff744241ccdd 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -39,7 +39,7 @@
 
 
 def test_svc():
-    """Check that sparse SVC gives the same result as SVC"""
+    # Check that sparse SVC gives the same result as SVC
 
     clf = svm.SVC(kernel='linear', probability=True, random_state=0)
     clf.fit(X, Y)
@@ -114,7 +114,7 @@ def test_svc_with_custom_kernel():
 
 
 def test_svc_iris():
-    """Test the sparse SVC with the iris dataset"""
+    # Test the sparse SVC with the iris dataset
     for k in ('linear', 'poly', 'rbf'):
         sp_clf = svm.SVC(kernel=k).fit(iris.data, iris.target)
         clf = svm.SVC(kernel=k).fit(iris.data.toarray(), iris.target)
@@ -129,9 +129,7 @@ def test_svc_iris():
 
 
 def test_error():
-    """
-    Test that it gives proper exception on deficient input
-    """
+    # Test that it gives proper exception on deficient input
     # impossible value of C
     assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y)
 
@@ -148,9 +146,7 @@ def test_error():
 
 
 def test_linearsvc():
-    """
-    Similar to test_SVC
-    """
+    # Similar to test_SVC
     clf = svm.LinearSVC(random_state=0).fit(X, Y)
     sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)
 
@@ -169,7 +165,7 @@ def test_linearsvc():
 
 
 def test_linearsvc_iris():
-    """Test the sparse LinearSVC with the iris dataset"""
+    # Test the sparse LinearSVC with the iris dataset
 
     sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
     clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target)
@@ -194,9 +190,7 @@ def test_linearsvc_iris():
 
 
 def test_weight():
-    """
-    Test class weights
-    """
+    # Test class weights
     X_, y_ = make_classification(n_samples=200, n_features=100,
                                  weights=[0.833, 0.167], random_state=0)
 
@@ -211,9 +205,7 @@ def test_weight():
 
 
 def test_sample_weights():
-    """
-    Test weights on individual samples
-    """
+    # Test weights on individual samples
     clf = svm.SVC()
     clf.fit(X_sp, Y)
     assert_array_equal(clf.predict(X[2]), [1.])
@@ -224,19 +216,14 @@ def test_sample_weights():
 
 
 def test_sparse_liblinear_intercept_handling():
-    """
-    Test that sparse liblinear honours intercept_scaling param
-    """
+    # Test that sparse liblinear honours intercept_scaling param
     test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC)
 
 
 def test_sparse_realdata():
-    """
-    Test on a subset from the 20newsgroups dataset.
-
-    This catchs some bugs if input is not correctly converted into
-    sparse format or weights are not correctly initialized.
-    """
+    # Test on a subset from the 20newsgroups dataset.
+    # This catchs some bugs if input is not correctly converted into
+    # sparse format or weights are not correctly initialized.
 
     data = np.array([0.03771744,  0.1003567,  0.01174647,  0.027069])
     indices = np.array([6, 5, 35, 31])
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 65fcbee63a2cd..08cb2d5c9ca83 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -37,9 +37,7 @@
 
 
 def test_libsvm_parameters():
-    """
-    Test parameters on classes that make use of libsvm.
-    """
+    # Test parameters on classes that make use of libsvm.
     clf = svm.SVC(kernel='linear').fit(X, Y)
     assert_array_equal(clf.dual_coef_, [[-0.25, .25]])
     assert_array_equal(clf.support_, [1, 3])
@@ -49,7 +47,7 @@ def test_libsvm_parameters():
 
 
 def test_libsvm_iris():
-    """Check consistency on dataset iris."""
+    # Check consistency on dataset iris.
 
     # shuffle the dataset so that labels are not ordered
     for k in ('linear', 'rbf'):
@@ -85,9 +83,7 @@ def test_libsvm_iris():
 
 
 def test_single_sample_1d():
-    """
-    Test whether SVCs work on a single sample given as a 1-d array
-    """
+    # Test whether SVCs work on a single sample given as a 1-d array
 
     clf = svm.SVC().fit(X, Y)
     clf.predict(X[0])
@@ -97,11 +93,8 @@ def test_single_sample_1d():
 
 
 def test_precomputed():
-    """
-    SVC with a precomputed kernel.
-
-    We test it with a toy dataset and with iris.
-    """
+    # SVC with a precomputed kernel.
+    # We test it with a toy dataset and with iris.
     clf = svm.SVC(kernel='precomputed')
     # Gram matrix for train data (square matrix)
     # (we use just a linear kernel)
@@ -170,9 +163,7 @@ def test_precomputed():
 
 
 def test_svr():
-    """
-    Test Support Vector Regression
-    """
+    # Test Support Vector Regression
 
     diabetes = datasets.load_diabetes()
     for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0),
@@ -215,9 +206,7 @@ def test_svr_errors():
 
 
 def test_oneclass():
-    """
-    Test OneClassSVM
-    """
+    # Test OneClassSVM
     clf = svm.OneClassSVM()
     clf.fit(X)
     pred = clf.predict(T)
@@ -231,9 +220,7 @@ def test_oneclass():
 
 
 def test_oneclass_decision_function():
-    """
-    Test OneClassSVM decision function
-    """
+    # Test OneClassSVM decision function
     clf = svm.OneClassSVM()
     rnd = check_random_state(2)
 
@@ -263,16 +250,12 @@ def test_oneclass_decision_function():
 
 
 def test_tweak_params():
-    """
-    Make sure some tweaking of parameters works.
-
-    We change clf.dual_coef_ at run time and expect .predict() to change
-    accordingly. Notice that this is not trivial since it involves a lot
-    of C/Python copying in the libsvm bindings.
-
-    The success of this test ensures that the mapping between libsvm and
-    the python classifier is complete.
-    """
+    # Make sure some tweaking of parameters works.
+    # We change clf.dual_coef_ at run time and expect .predict() to change
+    # accordingly. Notice that this is not trivial since it involves a lot
+    # of C/Python copying in the libsvm bindings.
+    # The success of this test ensures that the mapping between libsvm and
+    # the python classifier is complete.
     clf = svm.SVC(kernel='linear', C=1.0)
     clf.fit(X, Y)
     assert_array_equal(clf.dual_coef_, [[-.25, .25]])
@@ -282,11 +265,8 @@ def test_tweak_params():
 
 
 def test_probability():
-    """
-    Predict probabilities using SVC
-
-    This uses cross validation, so we use a slightly bigger testing set.
-    """
+    # Predict probabilities using SVC
+    # This uses cross validation, so we use a slightly bigger testing set.
 
     for clf in (svm.SVC(probability=True, random_state=0, C=1.0),
                 svm.NuSVC(probability=True, random_state=0)):
@@ -303,14 +283,10 @@ def test_probability():
                             np.exp(clf.predict_log_proba(iris.data)), 8)
 
 
-def test_svc_decision_function():
-    """
-    Test SVC's decision_function
-
-    Sanity check, test that decision_function implemented in python
-    returns the same as the one in libsvm
-
-    """
+def test_decision_function():
+    # Test decision_function
+    # Sanity check, test that decision_function implemented in python
+    # returns the same as the one in libsvm
     # multi class:
     clf = svm.SVC(kernel='linear', C=0.1).fit(iris.data, iris.target)
 
@@ -339,13 +315,9 @@ def test_svc_decision_function():
 
 
 def test_svr_decision_function():
-    """
-    Test SVR's decision_function
-
-    Sanity check, test that decision_function implemented in python
-    returns the same as the one in libsvm
-
-    """
+    # Test SVR's decision_function
+    # Sanity check, test that decision_function implemented in python
+    # returns the same as the one in libsvm
 
     X = iris.data
     y = iris.target
@@ -365,9 +337,7 @@ def test_svr_decision_function():
 
 
 def test_weight():
-    """
-    Test class weights
-    """
+    # Test class weights
     clf = svm.SVC(class_weight={1: 0.1})
     # we give a small weights to class 1
     clf.fit(X, Y)
@@ -386,9 +356,7 @@ def test_weight():
 
 
 def test_sample_weights():
-    """
-    Test weights on individual samples
-    """
+    # Test weights on individual samples
     # TODO: check on NuSVR, OneClass, etc.
     clf = svm.SVC()
     clf.fit(X, Y)
@@ -408,7 +376,7 @@ def test_sample_weights():
 
 
 def test_auto_weight():
-    """Test class weights for imbalanced data"""
+    # Test class weights for imbalanced data
     from sklearn.linear_model import LogisticRegression
     # We take as dataset the two-dimensional projection of iris so
     # that it is not separable and remove half of predictors from
@@ -435,9 +403,7 @@ def test_auto_weight():
 
 
 def test_bad_input():
-    """
-    Test that it gives proper exception on deficient input
-    """
+    # Test that it gives proper exception on deficient input
     # impossible value of C
     assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y)
 
@@ -491,9 +457,7 @@ def test_sparse_precomputed():
 
 
 def test_linearsvc_parameters():
-    """
-    Test possible parameter combinations in LinearSVC
-    """
+    # Test possible parameter combinations in LinearSVC
     # Generate list of possible parameter combinations
     losses = ['hinge', 'squared_hinge', 'logistic_regression', 'foo']
     penalties, duals = ['l1', 'l2', 'bar'], [True, False]
@@ -582,9 +546,7 @@ def test_linear_svx_uppercase_loss_penalty():
 
 
 def test_linearsvc():
-    """
-    Test basic routines using LinearSVC
-    """
+    # Test basic routines using LinearSVC
     clf = svm.LinearSVC(random_state=0).fit(X, Y)
 
     # by default should have intercept
@@ -613,7 +575,7 @@ def test_linearsvc():
 
 
 def test_linearsvc_crammer_singer():
-    """Test LinearSVC with crammer_singer multi-class svm"""
+    # Test LinearSVC with crammer_singer multi-class svm
     ovr_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
     cs_clf = svm.LinearSVC(multi_class='crammer_singer', random_state=0)
     cs_clf.fit(iris.data, iris.target)
@@ -633,7 +595,7 @@ def test_linearsvc_crammer_singer():
 
 
 def test_crammer_singer_binary():
-    """Test Crammer-Singer formulation in the binary case"""
+    # Test Crammer-Singer formulation in the binary case
     X, y = make_classification(n_classes=2, random_state=0)
 
     for fit_intercept in (True, False):
@@ -645,11 +607,8 @@ def test_crammer_singer_binary():
 
 def test_linearsvc_iris():
 
-    """
-    Test that LinearSVC gives plausible predictions on the iris dataset
-
-    Also, test symbolic class names (classes_).
-    """
+    # Test that LinearSVC gives plausible predictions on the iris dataset
+    # Also, test symbolic class names (classes_).
     target = iris.target_names[iris.target]
     clf = svm.LinearSVC(random_state=0).fit(iris.data, target)
     assert_equal(set(clf.classes_), set(iris.target_names))
@@ -661,9 +620,7 @@ def test_linearsvc_iris():
 
 
 def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC):
-    """
-    Test that dense liblinear honours intercept_scaling param
-    """
+    # Test that dense liblinear honours intercept_scaling param
     X = [[2, 1],
          [3, 1],
          [1, 3],
@@ -720,7 +677,7 @@ def test_liblinear_set_coef():
 
 
 def test_immutable_coef_property():
-    """Check that primal coef modification are not silently ignored"""
+    # Check that primal coef modification are not silently ignored
     svms = [
         svm.SVC(kernel='linear').fit(iris.data, iris.target),
         svm.NuSVC(kernel='linear').fit(iris.data, iris.target),
@@ -819,7 +776,7 @@ def test_consistent_proba():
 
 
 def test_linear_svc_convergence_warnings():
-    """Test that warnings are raised if model does not converge"""
+    # Test that warnings are raised if model does not converge
 
     lsvc = svm.LinearSVC(max_iter=2, verbose=1)
     assert_warns(ConvergenceWarning, lsvc.fit, X, Y)
@@ -827,7 +784,7 @@ def test_linear_svc_convergence_warnings():
 
 
 def test_svr_coef_sign():
-    """Test that SVR(kernel="linear") has coef_ with the right sign."""
+    # Test that SVR(kernel="linear") has coef_ with the right sign.
     # Non-regression test for #2933.
     X = np.random.RandomState(21).randn(10, 3)
     y = np.random.RandomState(12).randn(10)
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index d0cd0050fd8f8..59ad4e6ab5af6 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -1,4 +1,3 @@
-
 # Author: Gael Varoquaux
 # License: BSD 3 clause
 
@@ -81,13 +80,11 @@ def __init__(self, *vargs):
 # The tests
 
 def test_clone():
-    """Tests that clone creates a correct deep copy.
-
-    We create an estimator, make a copy of its original state
-    (which, in this case, is the current state of the estimator),
-    and check that the obtained copy is a correct deep copy.
+    # Tests that clone creates a correct deep copy.
+    # We create an estimator, make a copy of its original state
+    # (which, in this case, is the current state of the estimator),
+    # and check that the obtained copy is a correct deep copy.
 
-    """
     from sklearn.feature_selection import SelectFpr, f_classif
 
     selector = SelectFpr(f_classif, alpha=0.1)
@@ -101,12 +98,11 @@ def test_clone():
 
 
 def test_clone_2():
-    """Tests that clone doesn't copy everything.
+    # Tests that clone doesn't copy everything.
+    # We first create an estimator, give it an own attribute, and
+    # make a copy of its original state. Then we check that the copy doesn't
+    # have the specific attribute we manually added to the initial estimator.
 
-    We first create an estimator, give it an own attribute, and
-    make a copy of its original state. Then we check that the copy doesn't
-    have the specific attribute we manually added to the initial estimator.
-    """
     from sklearn.feature_selection import SelectFpr, f_classif
 
     selector = SelectFpr(f_classif, alpha=0.1)
@@ -116,7 +112,7 @@ def test_clone_2():
 
 
 def test_clone_buggy():
-    """Check that clone raises an error on buggy estimators."""
+    # Check that clone raises an error on buggy estimators.
     buggy = Buggy()
     buggy.a = 2
     assert_raises(RuntimeError, clone, buggy)
@@ -129,7 +125,7 @@ def test_clone_buggy():
 
 
 def test_clone_empty_array():
-    """Regression test for cloning estimators with empty arrays"""
+    # Regression test for cloning estimators with empty arrays
     clf = MyEstimator(empty=np.array([]))
     clf2 = clone(clf)
     assert_array_equal(clf.empty, clf2.empty)
@@ -140,7 +136,7 @@ def test_clone_empty_array():
 
 
 def test_repr():
-    """Smoke test the repr of the base estimator."""
+    # Smoke test the repr of the base estimator.
     my_estimator = MyEstimator()
     repr(my_estimator)
     test = T(K(), K())
@@ -154,7 +150,7 @@ def test_repr():
 
 
 def test_str():
-    """Smoke test the str of the base estimator"""
+    # Smoke test the str of the base estimator
     my_estimator = MyEstimator()
     str(my_estimator)
 
@@ -200,9 +196,9 @@ def test_set_params():
     # non-existing parameter of pipeline
     assert_raises(ValueError, clf.set_params, svm__stupid_param=True)
     # we don't currently catch if the things in pipeline are estimators
-    #bad_pipeline = Pipeline([("bad", NoEstimator())])
-    #assert_raises(AttributeError, bad_pipeline.set_params,
-    #              bad__stupid_param=True)
+    # bad_pipeline = Pipeline([("bad", NoEstimator())])
+    # assert_raises(AttributeError, bad_pipeline.set_params,
+    #               bad__stupid_param=True)
 
 
 def test_score_sample_weight():
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 043206323ff7d..3c25714857d37 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -248,7 +248,7 @@ def test_class_weight_classifiers():
 
 
 def test_class_weight_auto_classifiers():
-    """Test that class_weight="auto" improves f1-score"""
+    # Test that class_weight="auto" improves f1-score
 
     # This test is broken; its success depends on:
     # * a rare fortuitous RNG seed for make_classification; and
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
index 94946eff9f4fd..786bf561ec5e2 100644
--- a/sklearn/tests/test_cross_validation.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -465,7 +465,7 @@ def assert_counts_are_ok(idx_counts, p):
 
 
 def test_predefinedsplit_with_kfold_split():
-    '''Check that PredefinedSplit can reproduce a split generated by Kfold.'''
+    # Check that PredefinedSplit can reproduce a split generated by Kfold.
     folds = -1 * np.ones(10)
     kf_train = []
     kf_test = []
@@ -484,8 +484,8 @@ def test_predefinedsplit_with_kfold_split():
 
 
 def test_leave_label_out_changing_labels():
-    """Check that LeaveOneLabelOut and LeavePLabelOut work normally if
-    the labels variable is changed before calling __iter__"""
+    # Check that LeaveOneLabelOut and LeavePLabelOut work normally if
+    # the labels variable is changed before calling __iter__
     labels = np.array([0, 1, 2, 1, 1, 2, 0, 0])
     labels_changing = np.array(labels, copy=True)
     lolo = cval.LeaveOneLabelOut(labels)
@@ -605,9 +605,9 @@ def test_cross_val_score_fit_params():
     DUMMY_OBJ = object()
 
     def assert_fit_params(clf):
-        """Function to test that the values are passed correctly to the
-        classifier arguments for non-array type
-        """
+        # Function to test that the values are passed correctly to the
+        # classifier arguments for non-array type
+
         assert_equal(clf.dummy_int, DUMMY_INT)
         assert_equal(clf.dummy_str, DUMMY_STR)
         assert_equal(clf.dummy_obj, DUMMY_OBJ)
diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py
index 8fe2bdc6279c6..d9038187dfff2 100644
--- a/sklearn/tests/test_grid_search.py
+++ b/sklearn/tests/test_grid_search.py
@@ -93,7 +93,7 @@ def score(self):
 
 
 def test_parameter_grid():
-    """Test basic properties of ParameterGrid."""
+    # Test basic properties of ParameterGrid.
     params1 = {"foo": [1, 2, 3]}
     grid1 = ParameterGrid(params1)
     assert_true(isinstance(grid1, Iterable))
@@ -124,7 +124,7 @@ def test_parameter_grid():
 
 
 def test_grid_search():
-    """Test that the best estimator contains the right value for foo_param"""
+    # Test that the best estimator contains the right value for foo_param
     clf = MockClassifier()
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, verbose=3)
     # make sure it selects the smallest parameter in case of ties
@@ -205,10 +205,8 @@ def test_grid_search_score_method():
 
 
 def test_trivial_grid_scores():
-    """Test search over a "grid" with only one point.
-
-    Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
-    """
+    # Test search over a "grid" with only one point.
+    # Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
     clf = MockClassifier()
     grid_search = GridSearchCV(clf, {'foo_param': [1]})
     grid_search.fit(X, y)
@@ -220,7 +218,7 @@ def test_trivial_grid_scores():
 
 
 def test_no_refit():
-    """Test that grid search can be used for model selection only"""
+    # Test that grid search can be used for model selection only
     clf = MockClassifier()
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=False)
     grid_search.fit(X, y)
@@ -228,8 +226,8 @@ def test_no_refit():
 
 
 def test_grid_search_error():
-    """Test that grid search will capture errors on data with different
-    length"""
+    # Test that grid search will capture errors on data with different
+    # length
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
     clf = LinearSVC()
@@ -305,7 +303,7 @@ def test_grid_search_bad_param_grid():
 
 
 def test_grid_search_sparse():
-    """Test that grid search works with both dense and sparse matrices"""
+    # Test that grid search works with both dense and sparse matrices
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
     clf = LinearSVC()
@@ -344,8 +342,8 @@ def test_grid_search_sparse_scoring():
     assert_array_equal(y_pred, y_pred2)
     assert_equal(C, C2)
     # Smoke test the score
-    #np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
-    #                        cv.score(X_[:180], y[:180]))
+    # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
+    #                            cv.score(X_[:180], y[:180]))
 
     # test loss where greater is worse
     def f1_loss(y_true_, y_pred_):
@@ -361,8 +359,8 @@ def f1_loss(y_true_, y_pred_):
 
 
 def test_grid_search_precomputed_kernel():
-    """Test that grid search works when the input features are given in the
-    form of a precomputed kernel matrix """
+    # Test that grid search works when the input features are given in the
+    # form of a precomputed kernel matrix
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
     # compute the training kernel matrix corresponding to the linear kernel
@@ -389,8 +387,8 @@ def test_grid_search_precomputed_kernel():
 
 
 def test_grid_search_precomputed_kernel_error_nonsquare():
-    """Test that grid search returns an error with a non-square precomputed
-    training kernel matrix"""
+    # Test that grid search returns an error with a non-square precomputed
+    # training kernel matrix
     K_train = np.zeros((10, 20))
     y_train = np.ones((10, ))
     clf = SVC(kernel='precomputed')
@@ -399,7 +397,7 @@ def test_grid_search_precomputed_kernel_error_nonsquare():
 
 
 def test_grid_search_precomputed_kernel_error_kernel_function():
-    """Test that grid search returns an error when using a kernel_function"""
+    # Test that grid search returns an error when using a kernel_function
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
     kernel_function = lambda x1, x2: np.dot(x1, x2.T)
     clf = SVC(kernel=kernel_function)
@@ -422,11 +420,9 @@ def predict(self, X):
 
 
 def test_refit():
-    """Regression test for bug in refitting
-
-    Simulates re-fitting a broken estimator; this used to break with
-    sparse SVMs.
-    """
+    # Regression test for bug in refitting
+    # Simulates re-fitting a broken estimator; this used to break with
+    # sparse SVMs.
     X = np.arange(100).reshape(10, 10)
     y = np.array([0] * 5 + [1] * 5)
 
@@ -436,7 +432,7 @@ def test_refit():
 
 
 def test_gridsearch_nd():
-    """Pass X as list in GridSearchCV"""
+    # Pass X as list in GridSearchCV
     X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
     y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
     check_X = lambda x: x.shape[1:] == (5, 3, 2)
@@ -448,7 +444,7 @@ def test_gridsearch_nd():
 
 
 def test_X_as_list():
-    """Pass X as list in GridSearchCV"""
+    # Pass X as list in GridSearchCV
     X = np.arange(100).reshape(10, 10)
     y = np.array([0] * 5 + [1] * 5)
 
@@ -460,7 +456,7 @@ def test_X_as_list():
 
 
 def test_y_as_list():
-    """Pass y as list in GridSearchCV"""
+    # Pass y as list in GridSearchCV
     X = np.arange(100).reshape(10, 10)
     y = np.array([0] * 5 + [1] * 5)
 
@@ -608,7 +604,7 @@ def test_grid_search_score_consistency():
 
 
 def test_pickle():
-    """Test that a fit search can be pickled"""
+    # Test that a fit search can be pickled
     clf = MockClassifier()
     grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
     grid_search.fit(X, y)
@@ -621,7 +617,7 @@ def test_pickle():
 
 
 def test_grid_search_with_multioutput_data():
-    """ Test search with multi-output estimator"""
+    # Test search with multi-output estimator
 
     X, y = make_multilabel_classification(return_indicator=True,
                                           random_state=0)
@@ -647,7 +643,8 @@ def test_grid_search_with_multioutput_data():
 
     # Test with a randomized search
     for est in estimators:
-        random_search = RandomizedSearchCV(est, est_parameters, cv=cv, n_iter=3)
+        random_search = RandomizedSearchCV(est, est_parameters,
+                                           cv=cv, n_iter=3)
         random_search.fit(X, y)
         for parameters, _, cv_validation_scores in random_search.grid_scores_:
             est.set_params(**parameters)
@@ -660,7 +657,7 @@ def test_grid_search_with_multioutput_data():
 
 
 def test_predict_proba_disabled():
-    """Test predict_proba when disabled on estimator."""
+    # Test predict_proba when disabled on estimator.
     X = np.arange(20).reshape(5, -1)
     y = [0, 0, 1, 1, 1]
     clf = SVC(probability=False)
@@ -669,7 +666,7 @@ def test_predict_proba_disabled():
 
 
 def test_grid_search_allows_nans():
-    """ Test GridSearchCV with Imputer """
+    # Test GridSearchCV with Imputer
     X = np.arange(20, dtype=np.float64).reshape(5, -1)
     X[2, :] = np.nan
     y = [0, 0, 1, 1, 1]
@@ -697,10 +694,8 @@ def predict(self, X):
 
 
 def test_grid_search_failing_classifier():
-    """GridSearchCV with on_error != 'raise'
-
-    Ensures that a warning is raised and score reset where appropriate.
-    """
+    # GridSearchCV with on_error != 'raise'
+    # Ensures that a warning is raised and score reset where appropriate.
 
     X, y = make_classification(n_samples=20, n_features=10, random_state=0)
 
@@ -733,7 +728,7 @@ def test_grid_search_failing_classifier():
 
 
 def test_grid_search_failing_classifier_raise():
-    """GridSearchCV with on_error == 'raise' raises the error"""
+    # GridSearchCV with on_error == 'raise' raises the error
 
     X, y = make_classification(n_samples=20, n_features=10, random_state=0)
 
@@ -764,7 +759,8 @@ def test_parameters_sampler_replacement():
     sampler = ParameterSampler(params, n_iter=99, random_state=42)
     samples = list(sampler)
     assert_equal(len(samples), 99)
-    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) for p in samples]
+    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
+                        for p in samples]
     assert_equal(len(set(hashable_samples)), 99)
 
     # doesn't go into infinite loops
diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py
index a7df69ee77b30..2c7d3b6b62e21 100644
--- a/sklearn/tests/test_init.py
+++ b/sklearn/tests/test_init.py
@@ -14,9 +14,7 @@
 
 
 def test_import_skl():
-    """Test either above import has failed for some reason
-
-    "import *" is discouraged outside of the module level, hence we
-    rely on setting up the variable above
-    """
+    # Test either above import has failed for some reason
+    # "import *" is discouraged outside of the module level, hence we
+    # rely on setting up the variable above
     assert_equal(_top_import_error, None)
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index e24eda736e5e8..8469d67a1672a 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -22,7 +22,7 @@
 
 
 def test_additive_chi2_sampler():
-    """test that AdditiveChi2Sampler approximates kernel on random data"""
+    # test that AdditiveChi2Sampler approximates kernel on random data
 
     # compute exact kernel
     # appreviations for easier formular
@@ -80,7 +80,7 @@ def test_additive_chi2_sampler():
 
 
 def test_skewed_chi2_sampler():
-    """test that RBFSampler approximates kernel on random data"""
+    # test that RBFSampler approximates kernel on random data
 
     # compute exact kernel
     c = 0.03
@@ -111,7 +111,7 @@ def test_skewed_chi2_sampler():
 
 
 def test_rbf_sampler():
-    """test that RBFSampler approximates kernel on random data"""
+    # test that RBFSampler approximates kernel on random data
     # compute exact kernel
     gamma = 10.
     kernel = rbf_kernel(X, Y, gamma=gamma)
@@ -130,10 +130,8 @@ def test_rbf_sampler():
 
 
 def test_input_validation():
-    """Regression test: kernel approx. transformers should work on lists
-
-    No assertions; the old versions would simply crash
-    """
+    # Regression test: kernel approx. transformers should work on lists
+    # No assertions; the old versions would simply crash
     X = [[1, 2], [3, 4], [5, 6]]
     AdditiveChi2Sampler().fit(X).transform(X)
     SkewedChi2Sampler().fit(X).transform(X)
@@ -188,7 +186,7 @@ def test_nystroem_singular_kernel():
 
 
 def test_nystroem_poly_kernel_params():
-    """Non-regression: Nystroem should pass other parameters beside gamma."""
+    # Non-regression: Nystroem should pass other parameters beside gamma.
     rnd = np.random.RandomState(37)
     X = rnd.uniform(size=(10, 4))
 
@@ -200,7 +198,7 @@ def test_nystroem_poly_kernel_params():
 
 
 def test_nystroem_callable():
-    """Test Nystroem on a callable."""
+    # Test Nystroem on a callable.
     rnd = np.random.RandomState(42)
     n_samples = 10
     X = rnd.uniform(size=(n_samples, 4))
diff --git a/sklearn/tests/test_lda.py b/sklearn/tests/test_lda.py
index 336ceb043dcb8..8cc5faf1b3094 100644
--- a/sklearn/tests/test_lda.py
+++ b/sklearn/tests/test_lda.py
@@ -25,11 +25,9 @@
 
 
 def test_lda_predict():
-    """Test LDA classification.
-
-    This checks that LDA implements fit and predict and returns correct values
-    for simple toy data.
-    """
+    # Test LDA classification.
+    # This checks that LDA implements fit and predict and returns correct values
+    # for simple toy data.
     for test_case in solver_shrinkage:
         solver, shrinkage = test_case
         clf = lda.LDA(solver=solver, shrinkage=shrinkage)
@@ -66,8 +64,7 @@ def test_lda_predict():
 
 
 def test_lda_coefs():
-    """Test if the coefficients of the solvers are approximately the same.
-    """
+    # Test if the coefficients of the solvers are approximately the same.
     n_features = 2
     n_classes = 2
     n_samples = 1000
@@ -88,8 +85,7 @@ def test_lda_coefs():
 
 
 def test_lda_transform():
-    """Test LDA transform.
-    """
+    # Test LDA transform.
     clf = lda.LDA(solver="svd", n_components=1)
     X_transformed = clf.fit(X, y).transform(X)
     assert_equal(X_transformed.shape[1], 1)
@@ -132,8 +128,7 @@ def test_lda_orthogonality():
 
 
 def test_lda_scaling():
-    """Test if classification works correctly with differently scaled features.
-    """
+    # Test if classification works correctly with differently scaled features.
     n = 100
     rng = np.random.RandomState(1234)
     # use uniform distribution of features to make sure there is absolutely no
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index c0cee613a874c..adc56a4fa749d 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -44,7 +44,7 @@ def __init__(self, name, construct, skip_methods=(),
 
 
 def test_metaestimator_delegation():
-    """Ensures specified metaestimators have methods iff subestimator does"""
+    # Ensures specified metaestimators have methods iff subestimator does
     def hides(method):
         @property
         def wrapper(obj):
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index dad7363dde599..b783355007d7c 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -121,7 +121,7 @@ def test_ovr_fit_predict_sparse():
 
 
 def test_ovr_always_present():
-    """Test that ovr works with classes that are always present or absent."""
+    # Test that ovr works with classes that are always present or absent.
     # Note: tests is the case where _ConstantPredictor is utilised
     X = np.ones((10, 2))
     X[:5, :] = 0
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 658130e3ec75f..0e180b461b01a 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -32,12 +32,9 @@
 
 
 def test_gnb():
-    """
-    Gaussian Naive Bayes classification.
-
-    This checks that GaussianNB implements fit and predict and returns
-    correct values for a simple toy dataset.
-    """
+    # Gaussian Naive Bayes classification.
+    # This checks that GaussianNB implements fit and predict and returns
+    # correct values for a simple toy dataset.
 
     clf = GaussianNB()
     y_pred = clf.fit(X, y).predict(X)
@@ -54,7 +51,7 @@ def test_gnb():
 
 
 def test_gnb_prior():
-    """Test whether class priors are properly set. """
+    # Test whether class priors are properly set.
     clf = GaussianNB().fit(X, y)
     assert_array_almost_equal(np.array([3, 3]) / 6.0,
                               clf.class_prior_, 8)
@@ -96,7 +93,7 @@ def test_gnb_sample_weight():
 
 
 def test_discrete_prior():
-    """Test whether class priors are properly set. """
+    # Test whether class priors are properly set.
     for cls in [BernoulliNB, MultinomialNB]:
         clf = cls().fit(X2, y2)
         assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0),
@@ -104,11 +101,9 @@ def test_discrete_prior():
 
 
 def test_mnnb():
-    """Test Multinomial Naive Bayes classification.
-
-    This checks that MultinomialNB implements fit and predict and returns
-    correct values for a simple toy dataset.
-    """
+    # Test Multinomial Naive Bayes classification.
+    # This checks that MultinomialNB implements fit and predict and returns
+    # correct values for a simple toy dataset.
 
     for X in [X2, scipy.sparse.csr_matrix(X2)]:
         # Check the ability to predict the learning set.
@@ -188,7 +183,7 @@ def test_gnb_partial_fit():
 
 
 def test_discretenb_pickle():
-    """Test picklability of discrete naive Bayes classifiers"""
+    # Test picklability of discrete naive Bayes classifiers
 
     for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
         clf = cls().fit(X2, y2)
@@ -213,7 +208,7 @@ def test_discretenb_pickle():
 
 
 def test_input_check_fit():
-    """Test input checks for the fit method"""
+    # Test input checks for the fit method
     for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
         # check shape consistency for number of samples at fit time
         assert_raises(ValueError, cls().fit, X2, y2[:-1])
@@ -246,7 +241,7 @@ def test_input_check_partial_fit():
 
 
 def test_discretenb_predict_proba():
-    """Test discrete NB classes' probability scores"""
+    # Test discrete NB classes' probability scores
 
     # The 100s below distinguish Bernoulli from multinomial.
     # FIXME: write a test to show this.
@@ -277,8 +272,8 @@ def test_discretenb_predict_proba():
 
 
 def test_discretenb_uniform_prior():
-    """Test whether discrete NB classes fit a uniform prior
-       when fit_prior=False and class_prior=None"""
+    # Test whether discrete NB classes fit a uniform prior
+    # when fit_prior=False and class_prior=None
 
     for cls in [BernoulliNB, MultinomialNB]:
         clf = cls()
@@ -289,7 +284,7 @@ def test_discretenb_uniform_prior():
 
 
 def test_discretenb_provide_prior():
-    """Test whether discrete NB classes use provided prior"""
+    # Test whether discrete NB classes use provided prior
 
     for cls in [BernoulliNB, MultinomialNB]:
         clf = cls(class_prior=[0.5, 0.5])
@@ -304,8 +299,8 @@ def test_discretenb_provide_prior():
 
 
 def test_discretenb_provide_prior_with_partial_fit():
-    """Test whether discrete NB classes use provided prior
-       when using partial_fit"""
+    # Test whether discrete NB classes use provided prior
+    # when using partial_fit
 
     iris = load_iris()
     iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
@@ -363,10 +358,8 @@ def test_sample_weight_mnb():
 
 
 def test_coef_intercept_shape():
-    """coef_ and intercept_ should have shapes as in other linear models.
-
-    Non-regression test for issue #2127.
-    """
+    # coef_ and intercept_ should have shapes as in other linear models.
+    # Non-regression test for issue #2127.
     X = [[1, 0, 0], [1, 1, 1]]
     y = [1, 2]  # binary classification
 
@@ -408,13 +401,11 @@ def test_check_accuracy_on_digits():
 
 
 def test_feature_log_prob_bnb():
-    """Test for issue #4268.
-
-    Tests that the feature log prob value computed by BernoulliNB when
-    alpha=1.0 is equal to the expression given in Manning, Raghavan,
-    and Schuetze's "Introduction to Information Retrieval" book:
-    http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
-    """
+    # Test for issue #4268.
+    # Tests that the feature log prob value computed by BernoulliNB when
+    # alpha=1.0 is equal to the expression given in Manning, Raghavan,
+    # and Schuetze's "Introduction to Information Retrieval" book:
+    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
 
     X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
     Y = np.array([0, 0, 1, 2, 2])
@@ -433,12 +424,10 @@ def test_feature_log_prob_bnb():
 
 
 def test_bnb():
-    """
-    Tests that BernoulliNB when alpha=1.0 gives the same values as
-    those given for the toy example in Manning, Raghavan, and
-    Schuetze's "Introduction to Information Retrieval" book:
-    http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
-    """
+    # Tests that BernoulliNB when alpha=1.0 gives the same values as
+    # those given for the toy example in Manning, Raghavan, and
+    # Schuetze's "Introduction to Information Retrieval" book:
+    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
 
     # Training data points are:
     # Chinese Beijing Chinese (class: China)
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index c57ac01664a6a..a44e358b2493f 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -12,7 +12,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 
-from sklearn.base import BaseEstimator, clone
+from sklearn.base import clone
 from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
 from sklearn.svm import SVC
 from sklearn.linear_model import LogisticRegression
@@ -55,6 +55,7 @@ def set_params(self, **params):
         self.a = params['a']
         return self
 
+
 class TransfT(T):
 
     def transform(self, X, y=None):
@@ -77,8 +78,7 @@ def predict(self, X):
 
 
 def test_pipeline_init():
-    """ Test the various init parameters of the pipeline.
-    """
+    # Test the various init parameters of the pipeline.
     assert_raises(TypeError, Pipeline)
     # Check that we can't instantiate pipelines with objects without fit
     # method
@@ -129,8 +129,7 @@ def test_pipeline_init():
 
 
 def test_pipeline_methods_anova():
-    """ Test the various methods of the pipeline (anova).
-    """
+    # Test the various methods of the pipeline (anova).
     iris = load_iris()
     X = iris.data
     y = iris.target
@@ -146,8 +145,7 @@ def test_pipeline_methods_anova():
 
 
 def test_pipeline_fit_params():
-    """Test that the pipeline can take fit parameters
-    """
+    # Test that the pipeline can take fit parameters
     pipe = Pipeline([('transf', TransfT()), ('clf', FitParamT())])
     pipe.fit(X=None, y=None, clf__should_succeed=True)
     # classifier should return True
@@ -158,7 +156,7 @@ def test_pipeline_fit_params():
 
 
 def test_pipeline_methods_pca_svm():
-    """Test the various methods of the pipeline (pca + svm)."""
+    # Test the various methods of the pipeline (pca + svm).
     iris = load_iris()
     X = iris.data
     y = iris.target
@@ -174,7 +172,7 @@ def test_pipeline_methods_pca_svm():
 
 
 def test_pipeline_methods_preprocessing_svm():
-    """Test the various methods of the pipeline (preprocessing + svm)."""
+    # Test the various methods of the pipeline (preprocessing + svm).
     iris = load_iris()
     X = iris.data
     y = iris.target
diff --git a/sklearn/tests/test_qda.py b/sklearn/tests/test_qda.py
index fc59f05527154..949d28920b721 100644
--- a/sklearn/tests/test_qda.py
+++ b/sklearn/tests/test_qda.py
@@ -32,12 +32,9 @@
 
 
 def test_qda():
-    """
-    QDA classification.
-
-    This checks that QDA implements fit and predict and returns
-    correct values for a simple toy dataset.
-    """
+    # QDA classification.
+    # This checks that QDA implements fit and predict and returns
+    # correct values for a simple toy dataset.
     clf = qda.QDA()
     y_pred = clf.fit(X, y).predict(X)
     assert_array_equal(y_pred, y)
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index 7b2fb8157ac3a..e64fed90ece16 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -114,24 +114,24 @@ def check_input_with_sparse_random_matrix(random_matrix):
 
 
 def test_basic_property_of_random_matrix():
-    """Check basic properties of random matrix generation"""
+    # Check basic properties of random matrix generation
     for random_matrix in all_random_matrix:
-        check_input_size_random_matrix(random_matrix)
-        check_size_generated(random_matrix)
-        check_zero_mean_and_unit_norm(random_matrix)
+        yield check_input_size_random_matrix, random_matrix
+        yield check_size_generated, random_matrix
+        yield check_zero_mean_and_unit_norm, random_matrix
 
     for random_matrix in all_sparse_random_matrix:
-        check_input_with_sparse_random_matrix(random_matrix)
+        yield check_input_with_sparse_random_matrix, random_matrix
 
         random_matrix_dense = \
             lambda n_components, n_features, random_state: random_matrix(
                 n_components, n_features, random_state=random_state,
                 density=1.0)
-        check_zero_mean_and_unit_norm(random_matrix_dense)
+        yield check_zero_mean_and_unit_norm, random_matrix_dense
 
 
 def test_gaussian_random_matrix():
-    """Check some statical properties of Gaussian random matrix"""
+    # Check some statical properties of Gaussian random matrix
     # Check that the random matrix follow the proper distribution.
     # Let's say that each element of a_{ij} of A is taken from
     #   a_ij ~ N(0.0, 1 / n_components).
@@ -145,7 +145,7 @@ def test_gaussian_random_matrix():
 
 
 def test_sparse_random_matrix():
-    """Check some statical properties of sparse random matrix"""
+    # Check some statical properties of sparse random matrix
     n_components = 100
     n_features = 500
 
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index 30af1bb8f3b08..c66e6e225891e 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -17,7 +17,7 @@
 
 
 def test_graphviz_toy():
-    """Check correctness of export_graphviz"""
+    # Check correctness of export_graphviz
     clf = DecisionTreeClassifier(max_depth=3,
                                  min_samples_split=1,
                                  criterion="gini",
@@ -75,7 +75,7 @@ def test_graphviz_toy():
 
 
 def test_graphviz_errors():
-    """Check for errors of export_graphviz"""
+    # Check for errors of export_graphviz
     clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1)
     clf.fit(X, y)
 
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index c8f68b21bbe58..0891db43010cd 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -182,7 +182,7 @@ def assert_tree_equal(d, s, message):
 
 
 def test_classification_toy():
-    """Check classification on a toy dataset."""
+    # Check classification on a toy dataset.
     for name, Tree in CLF_TREES.items():
         clf = Tree(random_state=0)
         clf.fit(X, y)
@@ -196,7 +196,7 @@ def test_classification_toy():
 
 
 def test_weighted_classification_toy():
-    """Check classification on a weighted toy dataset."""
+    # Check classification on a weighted toy dataset.
     for name, Tree in CLF_TREES.items():
         clf = Tree(random_state=0)
 
@@ -210,7 +210,7 @@ def test_weighted_classification_toy():
 
 
 def test_regression_toy():
-    """Check regression on a toy dataset."""
+    # Check regression on a toy dataset.
     for name, Tree in REG_TREES.items():
         reg = Tree(random_state=1)
         reg.fit(X, y)
@@ -224,7 +224,7 @@ def test_regression_toy():
 
 
 def test_xor():
-    """Check on a XOR problem"""
+    # Check on a XOR problem
     y = np.zeros((10, 10))
     y[:5, :5] = 1
     y[5:, 5:] = 1
@@ -247,7 +247,7 @@ def test_xor():
 
 
 def test_iris():
-    """Check consistency on dataset iris."""
+    # Check consistency on dataset iris.
     for (name, Tree), criterion in product(CLF_TREES.items(), CLF_CRITERIONS):
         clf = Tree(criterion=criterion, random_state=0)
         clf.fit(iris.data, iris.target)
@@ -265,7 +265,7 @@ def test_iris():
 
 
 def test_boston():
-    """Check consistency on dataset boston house prices."""
+    # Check consistency on dataset boston house prices.
 
     for (name, Tree), criterion in product(REG_TREES.items(), REG_CRITERIONS):
         reg = Tree(criterion=criterion, random_state=0)
@@ -286,7 +286,7 @@ def test_boston():
 
 
 def test_probability():
-    """Predict probabilities using DecisionTreeClassifier."""
+    # Predict probabilities using DecisionTreeClassifier.
 
     for name, Tree in CLF_TREES.items():
         clf = Tree(max_depth=1, max_features=1, random_state=42)
@@ -305,7 +305,7 @@ def test_probability():
 
 
 def test_arrayrepr():
-    """Check the array representation."""
+    # Check the array representation.
     # Check resize
     X = np.arange(10000)[:, np.newaxis]
     y = np.arange(10000)
@@ -316,7 +316,7 @@ def test_arrayrepr():
 
 
 def test_pure_set():
-    """Check when y is pure."""
+    # Check when y is pure.
     X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
     y = [1, 1, 1, 1, 1, 1]
 
@@ -334,7 +334,7 @@ def test_pure_set():
 
 
 def test_numerical_stability():
-    """Check numerical stability."""
+    # Check numerical stability.
     X = np.array([
         [152.08097839, 140.40744019, 129.75102234, 159.90493774],
         [142.50700378, 135.81935120, 117.82884979, 162.75781250],
@@ -357,7 +357,7 @@ def test_numerical_stability():
 
 
 def test_importances():
-    """Check variable importances."""
+    # Check variable importances.
     X, y = datasets.make_classification(n_samples=2000,
                                         n_features=10,
                                         n_informative=3,
@@ -393,13 +393,13 @@ def test_importances():
 
 @raises(ValueError)
 def test_importances_raises():
-    """Check if variable importance before fit raises ValueError. """
+    # Check if variable importance before fit raises ValueError.
     clf = DecisionTreeClassifier()
     clf.feature_importances_
 
 
 def test_importances_gini_equal_mse():
-    """Check that gini is equivalent to mse for binary output variable"""
+    # Check that gini is equivalent to mse for binary output variable
 
     X, y = datasets.make_classification(n_samples=2000,
                                         n_features=10,
@@ -425,7 +425,7 @@ def test_importances_gini_equal_mse():
 
 
 def test_max_features():
-    """Check max_features."""
+    # Check max_features.
     for name, TreeRegressor in REG_TREES.items():
         reg = TreeRegressor(max_features="auto")
         reg.fit(boston.data, boston.target)
@@ -490,7 +490,7 @@ def test_max_features():
 
 
 def test_error():
-    """Test that it gives proper exception on deficient input."""
+    # Test that it gives proper exception on deficient input.
     for name, TreeEstimator in CLF_TREES.items():
         # predict before fit
         est = TreeEstimator()
@@ -547,7 +547,7 @@ def test_error():
 
 
 def test_min_samples_leaf():
-    """Test if leaves contain more than leaf_count training examples"""
+    # Test if leaves contain more than leaf_count training examples
     X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
     y = iris.target
 
@@ -617,7 +617,7 @@ def test_min_weight_fraction_leaf():
 
 
 def test_pickle():
-    """Check that tree estimator are pickable """
+    # Check that tree estimator are pickable
     for name, TreeClassifier in CLF_TREES.items():
         clf = TreeClassifier(random_state=0)
         clf.fit(iris.data, iris.target)
@@ -646,7 +646,7 @@ def test_pickle():
 
 
 def test_multioutput():
-    """Check estimators on multi-output problems."""
+    # Check estimators on multi-output problems.
     X = [[-2, -1],
          [-1, -1],
          [-1, -2],
@@ -702,7 +702,7 @@ def test_multioutput():
 
 
 def test_classes_shape():
-    """Test that n_classes_ and classes_ have proper shape."""
+    # Test that n_classes_ and classes_ have proper shape.
     for name, TreeClassifier in CLF_TREES.items():
         # Classification, single output
         clf = TreeClassifier(random_state=0)
@@ -722,7 +722,7 @@ def test_classes_shape():
 
 
 def test_unbalanced_iris():
-    """Check class rebalancing."""
+    # Check class rebalancing.
     unbalanced_X = iris.data[:125]
     unbalanced_y = iris.target[:125]
     sample_weight = _balance_weights(unbalanced_y)
@@ -734,7 +734,7 @@ def test_unbalanced_iris():
 
 
 def test_memory_layout():
-    """Check that it works no matter the memory layout"""
+    # Check that it works no matter the memory layout
     for (name, TreeEstimator), dtype in product(ALL_TREES.items(),
                                                 [np.float64, np.float32]):
         est = TreeEstimator(random_state=0)
@@ -777,7 +777,7 @@ def test_memory_layout():
 
 
 def test_sample_weight():
-    """Check sample weighting."""
+    # Check sample weighting.
     # Test that zero-weighted samples are not taken into account
     X = np.arange(100)[:, np.newaxis]
     y = np.ones(100)
@@ -828,7 +828,7 @@ def test_sample_weight():
 
 
 def test_sample_weight_invalid():
-    """Check sample weighting raises errors."""
+    # Check sample weighting raises errors.
     X = np.arange(100)[:, np.newaxis]
     y = np.ones(100)
     y[:50] = 0.0
@@ -897,7 +897,7 @@ def test_class_weights():
 
 
 def check_class_weight_errors(name):
-    """Test if class_weight raises errors and warnings when expected."""
+    # Test if class_weight raises errors and warnings when expected.
     TreeClassifier = CLF_TREES[name]
     _y = np.vstack((y, np.array(y) * 2)).T
 
@@ -921,7 +921,7 @@ def test_class_weight_errors():
 
 
 def test_max_leaf_nodes():
-    """Test greedy trees with max_depth + 1 leafs. """
+    # Test greedy trees with max_depth + 1 leafs.
     from sklearn.tree._tree import TREE_LEAF
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     k = 4
@@ -940,7 +940,7 @@ def test_max_leaf_nodes():
 
 
 def test_max_leaf_nodes_max_depth():
-    """Test preceedence of max_leaf_nodes over max_depth. """
+    # Test preceedence of max_leaf_nodes over max_depth.
     X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
     k = 4
     for name, TreeEstimator in ALL_TREES.items():
@@ -950,10 +950,8 @@ def test_max_leaf_nodes_max_depth():
 
 
 def test_arrays_persist():
-    """Ensure property arrays' memory stays alive when tree disappears
-
-    non-regression for #2726
-    """
+    # Ensure property arrays' memory stays alive when tree disappears
+    # non-regression for #2726
     for attr in ['n_classes', 'value', 'children_left', 'children_right',
                  'threshold', 'impurity', 'feature', 'n_node_samples']:
         value = getattr(DecisionTreeClassifier().fit([[0]], [0]).tree_, attr)
@@ -991,7 +989,7 @@ def test_with_only_one_non_constant_features():
 
 
 def test_big_input():
-    """Test if the warning for too large inputs is appropriate."""
+    # Test if the warning for too large inputs is appropriate.
     X = np.repeat(10 ** 40., 4).astype(np.float64).reshape(-1, 1)
     clf = DecisionTreeClassifier()
     try:
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index 920ce5893b95a..daf3cfbefb83b 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -11,7 +11,7 @@
 
 
 def test_compute_class_weight():
-    """Test (and demo) compute_class_weight."""
+    # Test (and demo) compute_class_weight.
     y = np.asarray([2, 2, 2, 3, 3, 4])
     classes = np.unique(y)
     cw = compute_class_weight("auto", classes, y)
@@ -20,14 +20,14 @@ def test_compute_class_weight():
 
 
 def test_compute_class_weight_not_present():
-    """Raise error when y does not contain all class labels"""
+    # Raise error when y does not contain all class labels
     classes = np.arange(4)
     y = np.asarray([0, 0, 0, 1, 1, 2])
     assert_raises(ValueError, compute_class_weight, "auto", classes, y)
 
 
 def test_compute_class_weight_auto_negative():
-    """Test compute_class_weight when labels are negative"""
+    # Test compute_class_weight when labels are negative
     # Test with balanced class labels.
     classes = np.array([-2, -1, 0])
     y = np.asarray([-1, -1, 0, 0, -2, -2])
@@ -45,7 +45,7 @@ def test_compute_class_weight_auto_negative():
 
 
 def test_compute_class_weight_auto_unordered():
-    """Test compute_class_weight when classes are unordered"""
+    # Test compute_class_weight when classes are unordered
     classes = np.array([1, 0, 3])
     y = np.asarray([1, 0, 0, 3, 3, 3])
     cw = compute_class_weight("auto", classes, y)
@@ -55,7 +55,7 @@ def test_compute_class_weight_auto_unordered():
 
 
 def test_compute_sample_weight():
-    """Test (and demo) compute_sample_weight."""
+    # Test (and demo) compute_sample_weight.
     # Test with balanced classes
     y = np.asarray([1, 1, 1, 2, 2, 2])
     sample_weight = compute_sample_weight("auto", y)
@@ -97,7 +97,7 @@ def test_compute_sample_weight():
 
 
 def test_compute_sample_weight_with_subsample():
-    """Test compute_sample_weight with subsamples specified."""
+    # Test compute_sample_weight with subsamples specified.
     # Test with balanced classes and all samples present
     y = np.asarray([1, 1, 1, 2, 2, 2])
     sample_weight = compute_sample_weight("auto", y, range(6))
@@ -136,7 +136,7 @@ def test_compute_sample_weight_with_subsample():
 
 
 def test_compute_sample_weight_errors():
-    """Test compute_sample_weight raises errors expected."""
+    # Test compute_sample_weight raises errors expected.
     # Invalid preset string
     y = np.asarray([1, 1, 1, 2, 2, 2])
     y_ = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 22db7d74f5f94..7c84548b4dce3 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -91,7 +91,7 @@ def test_logsumexp():
 
 
 def test_randomized_svd_low_rank():
-    """Check that extmath.randomized_svd is consistent with linalg.svd"""
+    # Check that extmath.randomized_svd is consistent with linalg.svd
     n_samples = 100
     n_features = 500
     rank = 5
@@ -151,7 +151,7 @@ def test_row_norms():
 
 
 def test_randomized_svd_low_rank_with_noise():
-    """Check that extmath.randomized_svd can handle noisy matrices"""
+    # Check that extmath.randomized_svd can handle noisy matrices
     n_samples = 100
     n_features = 500
     rank = 5
@@ -183,7 +183,7 @@ def test_randomized_svd_low_rank_with_noise():
 
 
 def test_randomized_svd_infinite_rank():
-    """Check that extmath.randomized_svd can handle noisy matrices"""
+    # Check that extmath.randomized_svd can handle noisy matrices
     n_samples = 100
     n_features = 500
     rank = 5
@@ -216,7 +216,7 @@ def test_randomized_svd_infinite_rank():
 
 
 def test_randomized_svd_transpose_consistency():
-    """Check that transposing the design matrix has limit impact"""
+    # Check that transposing the design matrix has limit impact
     n_samples = 100
     n_features = 500
     rank = 4
@@ -249,7 +249,7 @@ def test_randomized_svd_transpose_consistency():
 
 
 def test_svd_flip():
-    """Check that svd_flip works in both situations, and reconstructs input."""
+    # Check that svd_flip works in both situations, and reconstructs input.
     rs = np.random.RandomState(1999)
     n_samples = 20
     n_features = 10
@@ -286,7 +286,7 @@ def test_randomized_svd_sign_flip():
 
 
 def test_cartesian():
-    """Check if cartesian product delivers the right results"""
+    # Check if cartesian product delivers the right results
 
     axes = (np.array([1, 2, 3]), np.array([4, 5]), np.array([6, 7]))
 
@@ -312,7 +312,7 @@ def test_cartesian():
 
 
 def test_logistic_sigmoid():
-    """Check correctness and robustness of logistic sigmoid implementation"""
+    # Check correctness and robustness of logistic sigmoid implementation
     naive_logistic = lambda x: 1 / (1 + np.exp(-x))
     naive_log_logistic = lambda x: np.log(naive_logistic(x))
 
@@ -324,7 +324,7 @@ def test_logistic_sigmoid():
 
 
 def test_fast_dot():
-    """Check fast dot blas wrapper function"""
+    # Check fast dot blas wrapper function
     if fast_dot is np.dot:
         return
 
@@ -402,7 +402,7 @@ def test_fast_dot():
 
 
 def test_incremental_variance_update_formulas():
-    """Test Youngs and Cramer incremental variance formulas."""
+    # Test Youngs and Cramer incremental variance formulas.
     # Doggie data from http://www.mathsisfun.com/data/standard-deviation.html
     A = np.array([[600, 470, 170, 430, 300],
                   [600, 470, 170, 430, 300],
@@ -423,7 +423,7 @@ def test_incremental_variance_update_formulas():
 
 
 def test_incremental_variance_ddof():
-    """Test that degrees of freedom parameter for calculations are correct."""
+    # Test that degrees of freedom parameter for calculations are correct.
     rng = np.random.RandomState(1999)
     X = rng.randn(50, 10)
     n_samples, n_features = X.shape
diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py
index fde18d7c8999e..3580aa6acbff7 100644
--- a/sklearn/utils/tests/test_fixes.py
+++ b/sklearn/utils/tests/test_fixes.py
@@ -13,7 +13,7 @@
 
 
 def test_expit():
-    """Check numerical stability of expit (logistic function)."""
+    # Check numerical stability of expit (logistic function).
 
     # Simulate our previous Cython implementation, based on
     #http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index d5f6b695e04af..efd0d9ab7e920 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -209,7 +209,7 @@ def test_unique_labels():
 
 @ignore_warnings
 def test_unique_labels_non_specific():
-    """Test unique_labels with a variety of collected examples"""
+    # Test unique_labels with a variety of collected examples
 
     # Smoke test for all supported format
     for format in ["binary", "multiclass", "multilabel-sequences",
diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py
index cc07ed734c105..a2db52b78bec2 100644
--- a/sklearn/utils/tests/test_sparsefuncs.py
+++ b/sklearn/utils/tests/test_sparsefuncs.py
@@ -362,7 +362,7 @@ def test_count_nonzero():
 
 
 def test_csc_row_median():
-    """Test csc_row_median actually calculates the median."""
+    # Test csc_row_median actually calculates the median.
 
     # Test that it gives the same output when X is dense.
     rng = np.random.RandomState(0)
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 34e560d08561a..8ad43b5dfc5a3 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -20,7 +20,7 @@
 
 
 def test_make_rng():
-    """Check the check_random_state utility function behavior"""
+    # Check the check_random_state utility function behavior
     assert_true(check_random_state(None) is np.random.mtrand._rand)
     assert_true(check_random_state(np.random) is np.random.mtrand._rand)
 
@@ -37,12 +37,12 @@ def test_make_rng():
 
 
 def test_resample_noarg():
-    """Border case not worth mentioning in doctests"""
+    # Border case not worth mentioning in doctests
     assert_true(resample() is None)
 
 
 def test_deprecated():
-    """Test whether the deprecated decorator issues appropriate warnings"""
+    # Test whether the deprecated decorator issues appropriate warnings
     # Copied almost verbatim from http://docs.python.org/library/warnings.html
 
     # First a function...
@@ -79,7 +79,7 @@ class Ham(object):
 
 
 def test_resample_value_errors():
-    """Check that invalid arguments yield ValueError"""
+    # Check that invalid arguments yield ValueError
     assert_raises(ValueError, resample, [0], [0, 1])
     assert_raises(ValueError, resample, [0, 1], [0, 1], n_samples=3)
     assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 0a35f76047ab5..b1317e3d5a5ef 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -30,7 +30,7 @@
 
 
 def test_as_float_array():
-    """Test function for as_float_array"""
+    # Test function for as_float_array
     X = np.ones((3, 10), dtype=np.int32)
     X = X + np.arange(10, dtype=np.int32)
     # Checks that the return type is ok
@@ -63,7 +63,7 @@ def test_as_float_array():
 
 
 def test_np_matrix():
-    """Confirm that input validation code does not return np.matrix"""
+    # Confirm that input validation code does not return np.matrix
     X = np.arange(12).reshape(3, 4)
 
     assert_false(isinstance(as_float_array(X), np.matrix))
@@ -72,7 +72,7 @@ def test_np_matrix():
 
 
 def test_memmap():
-    """Confirm that input validation code doesn't copy memory mapped arrays"""
+    # Confirm that input validation code doesn't copy memory mapped arrays
 
     asflt = lambda x: as_float_array(x, copy=False)
 
@@ -88,11 +88,9 @@ def test_memmap():
 
 
 def test_ordering():
-    """Check that ordering is enforced correctly by validation utilities.
-
-    We need to check each validation utility, because a 'copy' without
-    'order=K' will kill the ordering.
-    """
+    # Check that ordering is enforced correctly by validation utilities.
+    # We need to check each validation utility, because a 'copy' without
+    # 'order=K' will kill the ordering.
     X = np.ones((10, 5))
     for A in X, X.T:
         for copy in (True, False):