diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index 2ad49833641dc..c952831d85e1f 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -126,7 +126,7 @@ class SimpleImputer(_BaseImputer):
         The placeholder for the missing values. All occurrences of
         `missing_values` will be imputed.
 
-    strategy : string, optional (default="mean")
+    strategy : string, default='mean'
         The imputation strategy.
 
         - If "mean", then replace missing values using the mean along
@@ -141,16 +141,16 @@ class SimpleImputer(_BaseImputer):
         .. versionadded:: 0.20
            strategy="constant" for fixed value imputation.
 
-    fill_value : string or numerical value, optional (default=None)
+    fill_value : string or numerical value, default=None
         When strategy == "constant", fill_value is used to replace all
         occurrences of missing_values.
         If left to the default, fill_value will be 0 when imputing numerical
         data and "missing_value" for strings or object data types.
 
-    verbose : integer, optional (default=0)
+    verbose : integer, default=0
         Controls the verbosity of the imputer.
 
-    copy : boolean, optional (default=True)
+    copy : boolean, default=True
         If True, a copy of X will be created. If False, imputation will
         be done in-place whenever possible. Note that, in the following cases,
         a new copy will always be made, even if `copy=False`:
@@ -159,7 +159,7 @@ class SimpleImputer(_BaseImputer):
         - If X is encoded as a CSR matrix;
         - If add_indicator=True.
 
-    add_indicator : boolean, optional (default=False)
+    add_indicator : boolean, default=False
         If True, a :class:`MissingIndicator` transform will stack onto output
         of the imputer's transform. This allows a predictive estimator
         to account for missingness despite imputation. If a feature has no
@@ -470,7 +470,7 @@ class MissingIndicator(TransformerMixin, BaseEstimator):
         `missing_values` will be indicated (True in the output array), the
         other values will be marked as False.
 
-    features : str, optional
+    features : str, default=None
         Whether the imputer mask should represent all or a subset of
         features.
 
@@ -478,7 +478,7 @@ class MissingIndicator(TransformerMixin, BaseEstimator):
           features containing missing values during fit time.
         - If "all", the imputer mask will represent all features.
 
-    sparse : boolean or "auto", optional
+    sparse : boolean or "auto", default=None
         Whether the imputer mask format should be sparse or dense.
 
         - If "auto" (default), the imputer mask will be of same type as
@@ -486,7 +486,7 @@ class MissingIndicator(TransformerMixin, BaseEstimator):
         - If True, the imputer mask will be a sparse matrix.
         - If False, the imputer mask will be a numpy array.
 
-    error_on_new : boolean, optional
+    error_on_new : boolean, default=None
         If True (default), transform will raise an error when there are
         features with missing values in transform that have no missing values
         in fit. This is applicable only when ``features="missing-only"``.
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index fa9d576f04008..7983b8dbe4062 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -52,7 +52,7 @@ class IterativeImputer(_BaseImputer):
         If ``sample_posterior`` is True, the estimator must support
         ``return_std`` in its ``predict`` method.
 
-    missing_values : int, np.nan, optional (default=np.nan)
+    missing_values : int, np.nan, default=np.nan
         The placeholder for the missing values. All occurrences of
         ``missing_values`` will be imputed.
 
@@ -62,7 +62,7 @@ class IterativeImputer(_BaseImputer):
         ``return_std`` in its ``predict`` method if set to ``True``. Set to
         ``True`` if using ``IterativeImputer`` for multiple imputations.
 
-    max_iter : int, optional (default=10)
+    max_iter : int, default=10
         Maximum number of imputation rounds to perform before returning the
         imputations computed during the final round. A round is a single
         imputation of each feature with missing values. The stopping criterion
@@ -70,10 +70,10 @@ class IterativeImputer(_BaseImputer):
         where `X_t` is `X` at iteration `t. Note that early stopping is only
         applied if ``sample_posterior=False``.
 
-    tol : float, optional (default=1e-3)
+    tol : float, default=1e-3
         Tolerance of the stopping condition.
 
-    n_nearest_features : int, optional (default=None)
+    n_nearest_features : int, default=None
         Number of other features to use to estimate the missing values of
         each feature column. Nearness between features is measured using
         the absolute correlation coefficient between each feature pair (after
@@ -83,12 +83,12 @@ class IterativeImputer(_BaseImputer):
         imputed target feature. Can provide significant speed-up when the
         number of features is huge. If ``None``, all features will be used.
 
-    initial_strategy : str, optional (default="mean")
+    initial_strategy : str, default='mean'
         Which strategy to use to initialize the missing values. Same as the
         ``strategy`` parameter in :class:`sklearn.impute.SimpleImputer`
         Valid values: {"mean", "median", "most_frequent", or "constant"}.
 
-    imputation_order : str, optional (default="ascending")
+    imputation_order : str, default='ascending'
         The order in which the features will be imputed. Possible values:
 
         "ascending"
@@ -102,34 +102,34 @@ class IterativeImputer(_BaseImputer):
         "random"
             A random order for each round.
 
-    skip_complete : boolean, optional (default=False)
+    skip_complete : boolean, default=False
         If ``True`` then features with missing values during ``transform``
         which did not have any missing values during ``fit`` will be imputed
         with the initial imputation method only. Set to ``True`` if you have
         many features with no missing values at both ``fit`` and ``transform``
         time to save compute.
 
-    min_value : float, optional (default=None)
+    min_value : float, default=None
         Minimum possible imputed value. Default of ``None`` will set minimum
         to negative infinity.
 
-    max_value : float, optional (default=None)
+    max_value : float, default=None
         Maximum possible imputed value. Default of ``None`` will set maximum
         to positive infinity.
 
-    verbose : int, optional (default=0)
+    verbose : int, default=0
         Verbosity flag, controls the debug messages that are issued
         as functions are evaluated. The higher, the more verbose. Can be 0, 1,
         or 2.
 
-    random_state : int, RandomState instance or None, optional (default=None)
+    random_state : int, RandomState instance or None, default=None
         The seed of the pseudo random number generator to use. Randomizes
         selection of estimator features if n_nearest_features is not None, the
         ``imputation_order`` if ``random``, and the sampling from posterior if
         ``sample_posterior`` is True. Use an integer for determinism.
         See :term:`the Glossary <random_state>`.
 
-    add_indicator : boolean, optional (default=False)
+    add_indicator : boolean, default=False
         If True, a :class:`MissingIndicator` transform will stack onto output
         of the imputer's transform. This allows a predictive estimator
         to account for missingness despite imputation. If a feature has no
@@ -443,7 +443,7 @@ def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):
         X_filled : ndarray, shape (n_samples, n_features)
             Input data with the most recent imputations.
 
-        tolerance : float, optional (default=1e-6)
+        tolerance : float, default=1e-6
             ``abs_corr_mat`` can have nans, which will be replaced
             with ``tolerance``.