scikit-learn · lorentzenchr · Sep 17, 2021 · Sep 15, 2021 · Sep 17, 2021 · Sep 17, 2021
diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py
@@ -6,7 +6,7 @@
 Benchmark on the MNIST dataset.  The dataset comprises 70,000 samples
 and 784 features. Here, we consider the task of predicting
 10 classes -  digits from 0 to 9 from their raw images. By contrast to the
-covertype dataset, the feature space is homogenous.
+covertype dataset, the feature space is homogeneous.
 
 Example of output :
     [..]

diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
@@ -43,10 +43,10 @@ def compute_time(t_start, delta):
     return delta.seconds + delta.microseconds / mu_second
 
 
-def bench_scikit_transformer(X, transfomer):
+def bench_scikit_transformer(X, transformer):
     gc.collect()
 
-    clf = clone(transfomer)
+    clf = clone(transformer)
 
     # start time
     t_start = datetime.now()
@@ -195,7 +195,7 @@ def print_row(clf_type, time_fit, time_transform):
     ###########################################################################
     n_nonzeros = int(opts.ratio_nonzeros * opts.n_features)
 
-    print("Dataset statics")
+    print("Dataset statistics")
     print("===========================")
     print("n_samples \t= %s" % opts.n_samples)
     print("n_features \t= %s" % opts.n_features)

diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml
@@ -39,7 +39,7 @@ jobs:
       ${{ insert }}: ${{ parameters.matrix }}
 
   steps:
-    # Container is detached and sleeping, allowing steps to run commmands
+    # Container is detached and sleeping, allowing steps to run commands
     # in the container. The TEST_DIR is mapped allowing the host to access
     # the JUNITXML file
     - script: >

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
@@ -34,7 +34,7 @@ def human_readable_data_quantity(quantity, multiple=1024):
 
 def get_file_extension(version):
     if "dev" in version:
-        # The 'dev' branch should be explictly handled
+        # The 'dev' branch should be explicitly handled
         return "zip"
 
     current_version = LooseVersion(version)

diff --git a/build_tools/shared.sh b/build_tools/shared.sh
@@ -5,7 +5,7 @@ get_dep() {
         # do not install with none
         echo
     elif [[ "${version%%[^0-9.]*}" ]]; then
-        # version number is explicity passed
+        # version number is explicitly passed
         echo "$package==$version"
     elif [[ "$version" == "latest" ]]; then
         # use latest

diff --git a/doc/common_pitfalls.rst b/doc/common_pitfalls.rst
@@ -560,7 +560,7 @@ bad performance. Similarly, we want a random forest to be robust w.r.t the
 set of randomly selected features that each tree will be using.
 
 For these reasons, it is preferable to evaluate the cross-validation
-preformance by letting the estimator use a different RNG on each fold. This
+performance by letting the estimator use a different RNG on each fold. This
 is done by passing a `RandomState` instance (or `None`) to the estimator
 initialization.
 

diff --git a/doc/conf.py b/doc/conf.py
@@ -240,7 +240,7 @@
     "release_highlights"
 ] = f"auto_examples/release_highlights/{latest_highlights}"
 
-# get version from higlight name assuming highlights have the form
+# get version from highlight name assuming highlights have the form
 # plot_release_highlights_0_22_0
 highlight_version = ".".join(latest_highlights.split("_")[-3:-1])
 html_context["release_highlights_version"] = highlight_version

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
@@ -374,7 +374,7 @@ isolation from the Python packages installed via the system packager. When
 using an isolated environment, ``pip3`` should be replaced by ``pip`` in the
 above commands.
 
-When precompiled wheels of the runtime dependencies are not avalaible for your
+When precompiled wheels of the runtime dependencies are not available for your
 architecture (e.g. ARM), you can install the system versions:
 
 .. prompt:: bash $

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
@@ -1004,7 +1004,7 @@ installed in your current Python environment:
 
   asv run --python=same
 
-It's particulary useful when you installed scikit-learn in editable mode to
+It's particularly useful when you installed scikit-learn in editable mode to
 avoid creating a new environment each time you run the benchmarks. By default
 the results are not saved when using an existing installation. To save the
 results you must specify a commit hash:

diff --git a/doc/developers/maintainer.rst b/doc/developers/maintainer.rst
@@ -33,7 +33,7 @@ Before a release
 
    - ``maint_tools/sort_whats_new.py`` can put what's new entries into
      sections. It's not perfect, and requires manual checking of the changes.
-     If the whats new list is well curated, it may not be necessary.
+     If the what's new list is well curated, it may not be necessary.
 
    - The ``maint_tools/whats_missing.sh`` script may be used to identify pull
      requests that were merged but likely missing from What's New.
@@ -198,7 +198,7 @@ Making a release
   `Continuous Integration
   <https://en.wikipedia.org/wiki/Continuous_integration>`_. The CD workflow on
   GitHub Actions is also used to automatically create nightly builds and
-  publish packages for the developement branch of scikit-learn. See
+  publish packages for the development branch of scikit-learn. See
   :ref:`install_nightly_builds`.
 
 4. Once all the CD jobs have completed successfully in the PR, merge it,

diff --git a/doc/install.rst b/doc/install.rst
@@ -158,7 +158,7 @@ Installing on Apple Silicon M1 hardware
 
 The recently introduced `macos/arm64` platform (sometimes also known as
 `macos/aarch64`) requires the open source community to upgrade the build
-configuation and automation to properly support it.
+configuration and automation to properly support it.
 
 At the time of writing (January 2021), the only way to get a working
 installation of scikit-learn on this hardware is to install scikit-learn and its
@@ -204,7 +204,7 @@ It can be installed by typing the following command:
 Debian/Ubuntu
 -------------
 
-The Debian/Ubuntu package is splitted in three different packages called
+The Debian/Ubuntu package is split in three different packages called
 ``python3-sklearn`` (python modules), ``python3-sklearn-lib`` (low-level
 implementations and bindings), ``python3-sklearn-doc`` (documentation).
 Only the Python 3 version is available in the Debian Buster (the more recent

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -573,7 +573,7 @@ many estimators. This visualization is activated by setting the
 
   >>> from sklearn import set_config
   >>> set_config(display='diagram')   # doctest: +SKIP
-  >>> # diplays HTML representation in a jupyter context
+  >>> # displays HTML representation in a jupyter context
   >>> column_trans  # doctest: +SKIP
 
 An example of the HTML output can be seen in the

diff --git a/doc/modules/cross_decomposition.rst b/doc/modules/cross_decomposition.rst
@@ -64,7 +64,7 @@ Set :math:`X_1` to :math:`X` and :math:`Y_1` to :math:`Y`. Then, for each
   :math:`C = X_k^T Y_k`.
   :math:`u_k` and :math:`v_k` are called the *weights*.
   By definition, :math:`u_k` and :math:`v_k` are
-  choosen so that they maximize the covariance between the projected
+  chosen so that they maximize the covariance between the projected
   :math:`X_k` and the projected target, that is :math:`\text{Cov}(X_k u_k,
   Y_k v_k)`.
 - b) Project :math:`X_k` and :math:`Y_k` on the singular vectors to obtain

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
@@ -974,7 +974,7 @@ test is therefore only able to show when the model reliably outperforms
 random guessing.
 
 Finally, :func:`~sklearn.model_selection.permutation_test_score` is computed
-using brute force and interally fits ``(n_permutations + 1) * n_cv`` models.
+using brute force and internally fits ``(n_permutations + 1) * n_cv`` models.
 It is therefore only tractable with small datasets for which fitting an
 individual model is very fast.
 

diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
@@ -829,7 +829,7 @@ and the intensity of the regularization with the :attr:`alpha_W` and :attr:`alph
 (:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are scaled by the number
 of samples (:math:`n\_samples`) for `H` and the number of features (:math:`n\_features`)
 for `W` to keep their impact balanced with respect to one another and to the data fit
-term as independant as possible of the size of the training set. Then the priors terms
+term as independent as possible of the size of the training set. Then the priors terms
 are:
 
 .. math::

diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst
@@ -187,7 +187,7 @@ an estimate for the covariance matrix). Setting this parameter to a value
 between these two extrema will estimate a shrunk version of the covariance
 matrix.
 
-The shrinked Ledoit and Wolf estimator of covariance may not always be the
+The shrunk Ledoit and Wolf estimator of covariance may not always be the
 best choice. For example if the distribution of the data
 is normally distributed, the
 Oracle Shrinkage Approximating estimator :class:`sklearn.covariance.OAS`
@@ -234,7 +234,7 @@ For QDA, the use of the SVD solver relies on the fact that the covariance
 matrix :math:`\Sigma_k` is, by definition, equal to :math:`\frac{1}{n - 1}
 X_k^tX_k = \frac{1}{n - 1} V S^2 V^t` where :math:`V` comes from the SVD of the (centered)
 matrix: :math:`X_k = U S V^t`. It turns out that we can compute the
-log-posterior above without having to explictly compute :math:`\Sigma`:
+log-posterior above without having to explicitly compute :math:`\Sigma`:
 computing :math:`S` and :math:`V` via the SVD of :math:`X` is enough. For
 LDA, two SVDs are computed: the SVD of the centered input matrix :math:`X`
 and the SVD of the class-wise mean vectors.

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -2381,7 +2381,7 @@ of 0.0.
 A scorer object with a specific choice of ``power`` can be built by::
 
   >>> from sklearn.metrics import d2_tweedie_score, make_scorer
-  >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, pwoer=1.5)
+  >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5)
 
 .. _pinball_loss:
 

diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst
@@ -332,7 +332,7 @@ chosen 1) greater than the minimum number of objects a cluster has to contain,
 so that other objects can be local outliers relative to this cluster, and 2)
 smaller than the maximum number of close by objects that can potentially be
 local outliers.
-In practice, such informations are generally not available, and taking
+In practice, such information is generally not available, and taking
 n_neighbors=20 appears to work well in general.
 When the proportion of outliers is high (i.e. greater than 10 \%, as in the
 example below), n_neighbors should be greater (n_neighbors=35 in the example

diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
@@ -123,7 +123,7 @@ Please refer to the :ref:`mathematical section below
 The first two loss functions are lazy, they only update the model
 parameters if an example violates the margin constraint, which makes
 training very efficient and may result in sparser models (i.e. with more zero
-coefficents), even when L2 penalty is used.
+coefficients), even when L2 penalty is used.
 
 Using ``loss="log"`` or ``loss="modified_huber"`` enables the
 ``predict_proba`` method, which gives a vector of probability estimates
@@ -408,7 +408,7 @@ parameters, we minimize the regularized training error given by
 where :math:`L` is a loss function that measures model (mis)fit and
 :math:`R` is a regularization term (aka penalty) that penalizes model
 complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls
-the regularization stength.
+the regularization strength.
 
 Different choices for :math:`L` entail different classifiers or regressors:
 

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
@@ -623,7 +623,7 @@ misclassified or within the margin boundary. Ideally, the value :math:`y_i
 (w^T \phi (x_i) + b)` would be :math:`\geq 1` for all samples, which
 indicates a perfect prediction. But problems are usually not always perfectly
 separable with a hyperplane, so we allow some samples to be at a distance :math:`\zeta_i` from
-their correct margin boundary. The penalty term `C` controls the strengh of
+their correct margin boundary. The penalty term `C` controls the strength of
 this penalty, and as a result, acts as an inverse regularization parameter
 (see note below).
 

diff --git a/doc/roadmap.rst b/doc/roadmap.rst
@@ -51,7 +51,7 @@ external to the core library.
   (i.e. rectangular data largely invariant to column and row order;
   predicting targets with simple structure)
 * improve the ease for users to develop and publish external components
-* improve inter-operability with modern data science tools (e.g. Pandas, Dask)
+* improve interoperability with modern data science tools (e.g. Pandas, Dask)
   and infrastructures (e.g. distributed processing)
 
 Many of the more fine-grained goals can be found under the `API tag

diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -1237,7 +1237,7 @@ table.sk-sponsor-table td {
   text-align: center
 }
 
-/* pygments - highlightning */
+/* pygments - highlighting */
 
 .highlight .hll { background-color: #ffffcc }
 .highlight  { background: #f8f8f8; }

diff --git a/doc/tutorial/machine_learning_map/ML_MAPS_README.txt b/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
@@ -7,7 +7,7 @@ by Andreas Mueller:
 
 (https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html)
 
-The image is made interactive using an imagemap, and uses the jQuery Map Hilight plugin module
+The image is made interactive using an imagemap, and uses the jQuery Map Highlight plugin module
 by David Lynch (https://davidlynch.org/projects/maphilight/docs/) to highlight
 the different items on the image upon mouseover.
 

diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -2836,7 +2836,7 @@ class QuotedString(Token):
     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
         super(QuotedString,self).__init__()
 
-        # remove white space from quote chars - wont work anyway
+        # remove white space from quote chars - won't work anyway
         quoteChar = quoteChar.strip()
         if not quoteChar:
             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

diff --git a/doc/whats_new/v0.16.rst b/doc/whats_new/v0.16.rst
@@ -54,7 +54,7 @@ Highlights
 
 - Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
 
-- Probability callibration of classifiers using
+- Probability calibration of classifiers using
   :class:`calibration.CalibratedClassifierCV`.
 
 - :class:`cluster.Birch` clustering method for large-scale datasets.

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -1286,7 +1286,7 @@ Support for Python 3.3 has been officially dropped.
   be used for novelty detection, i.e. predict on new unseen data. Available
   prediction methods are ``predict``, ``decision_function`` and
   ``score_samples``. By default, ``novelty`` is set to ``False``, and only
-  the ``fit_predict`` method is avaiable.
+  the ``fit_predict`` method is available.
   By :user:`Albert Thomas <albertcthomas>`.
 
 - |Fix| Fixed a bug in :class:`neighbors.NearestNeighbors` where fitting a

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
@@ -1060,7 +1060,7 @@ These changes mostly affect library developers.
 
 - Add ``check_fit_idempotent`` to
   :func:`~utils.estimator_checks.check_estimator`, which checks that
-  when `fit` is called twice with the same data, the ouput of
+  when `fit` is called twice with the same data, the output of
   `predict`, `predict_proba`, `transform`, and `decision_function` does not
   change. :pr:`12328` by :user:`Nicolas Hug <NicolasHug>`
 

diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
@@ -341,7 +341,7 @@ Changelog
   :pr:`16006` by :user:`Rushabh Vasani <rushabh-v>`.
 
 - |API| The `StreamHandler` was removed from `sklearn.logger` to avoid
-  double logging of messages in common cases where a hander is attached
+  double logging of messages in common cases where a handler is attached
   to the root logger, and to follow the Python logging documentation
   recommendation for libraries to leave the log message handling to
   users and application code. :pr:`16451` by :user:`Christoph Deil <cdeil>`.

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -713,7 +713,7 @@ Changelog
   :user:`Joseph Willard <josephwillard>`
 
 - |Fix| bug in :func:`metrics.hinge_loss` where error occurs when
-  ``y_true`` is missing some labels that are provided explictly in the
+  ``y_true`` is missing some labels that are provided explicitly in the
   ``labels`` parameter.
   :pr:`17935` by :user:`Cary Goltermann <Ultramann>`.
 

diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py
@@ -215,7 +215,7 @@
 # %%
 #
 # Lets evaluate our gradient boosting model with the mean absolute error of the
-# relative demand averaged accross our 5 time-based cross-validation splits:
+# relative demand averaged across our 5 time-based cross-validation splits:
 
 
 def evaluate(model, X, y, cv):

diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py
@@ -178,7 +178,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3).
 print(f" * calibrated classifier: {cal_score:.3f}")
 
 # %%
-# Finally we generate a grid of possibile uncalibrated probabilities over
+# Finally we generate a grid of possible uncalibrated probabilities over
 # the 2-simplex, compute the corresponding calibrated probabilities and
 # plot arrows for each. The arrows are colored according the highest
 # uncalibrated probability. This illustrates the learned calibration map:

diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py
@@ -70,7 +70,7 @@
 # are Gaussian distributed with mean of 0 but feature 1 has a standard
 # deviation equal to 2 and feature 2 has a standard deviation equal to 1. Next,
 # 25 samples are replaced with Gaussian outlier samples where feature 1 has
-# a standard devation equal to 1 and feature 2 has a standard deviation equal
+# a standard deviation equal to 1 and feature 2 has a standard deviation equal
 # to 7.
 
 import numpy as np

diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py
@@ -134,7 +134,7 @@
 #
 # On the other hand, the PLS regressor manages to capture the effect of the
 # direction with the lowest variance, thanks to its use of target information
-# during the transformation: it can recogize that this direction is actually
+# during the transformation: it can recognize that this direction is actually
 # the most predictive. We note that the first PLS component is negatively
 # correlated with the target, which comes from the fact that the signs of
 # eigenvectors are arbitrary.

diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -17,7 +17,7 @@
 model is trained using the training set and evaluated using the validation set.
 When each additional stage of regression tree is added, the validation set is
 used to score the model.  This is continued until the scores of the model in
-the last ``n_iter_no_change`` stages do not improve by atleast `tol`. After
+the last ``n_iter_no_change`` stages do not improve by at least `tol`. After
 that the model is considered to have converged and further addition of stages
 is "stopped early".
 
@@ -64,7 +64,7 @@
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                         random_state=0)
 
-    # We specify that if the scores don't improve by atleast 0.01 for the last
+    # We specify that if the scores don't improve by at least 0.01 for the last
     # 10 stages, stop fitting additional stages
     gbes = ensemble.GradientBoostingClassifier(n_estimators=n_estimators,
                                                validation_fraction=0.2,

diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -184,7 +184,7 @@ def highlight_min(x):
 # the fact the squared error estimator is very sensitive to large outliers
 # which can cause significant overfitting. This can be seen on the right hand
 # side of the previous plot. The conditional median estimator is biased
-# (underestimation for this asymetric noise) but is also naturally robust to
+# (underestimation for this asymmetric noise) but is also naturally robust to
 # outliers and overfits less.
 #
 # Calibration of the confidence interval

diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py
@@ -354,7 +354,7 @@
 
 # %%
 # Two regions are populated: when the EXPERIENCE coefficient is
-# positive the AGE one is negative and viceversa.
+# positive the AGE one is negative and vice-versa.
 #
 # To go further we remove one of the 2 features and check what is the impact
 # on the model stability.
@@ -664,7 +664,7 @@
 # It is important to keep in mind that the coefficients that have been
 # dropped may still be related to the outcome by themselves: the model
 # chose to suppress them because they bring little or no additional
-# information on top of the other features. Additionnaly, this selection
+# information on top of the other features. Additionally, this selection
 # is unstable for correlated features, and should be interpreted with
 # caution.
 #