From 9dbfbc85ef43487e6e032f9b772ebd9ae8c8c161 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 15:07:06 -0700
Subject: [PATCH 001/127] added a function with confusion matrix derived
 metrics (fpr, tpr, tnr, fnr)

---
 sklearn/metrics/__init__.py        |   1 +
 sklearn/metrics/_classification.py | 203 +++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+)

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 8bcb047ec8161..b9d7ec3bd0a9e 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -33,6 +33,7 @@
 from ._classification import zero_one_loss
 from ._classification import brier_score_loss
 from ._classification import multilabel_confusion_matrix
+from ._classification import tpr_fpr_tnr_fnr_scores
 
 from . import cluster
 from .cluster import adjusted_mutual_info_score
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 2ceccca65203e..d4b0f4c7347b6 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1504,8 +1504,211 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
     return precision, recall, f_score, true_sum
 
 
+<<<<<<< HEAD
 @_deprecate_positional_args
 def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
+=======
+def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=None,
+                           warn_for=('tpr', 'fpr', 'tnr', 'fnr'), sample_weight=None,zero_division="warn"):
+    """Compute TPR, FPR, TNR, FNR for each class
+
+    The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
+    true positives and ``fn`` the number of false negatives.
+
+    The FPR is the ratio ``fp / (tn + fp)`` where ``tn`` is the number of
+    true negatives and ``fp`` the number of false positives.
+
+    The TNR is the ratio ``tn / (tn + fp)`` where ``tn`` is the number of
+    true negatives and ``fp`` the number of false positives.
+
+    The FNR is the ratio ``fn / (tp + fn)`` where ``tp`` is the number of
+    true positives and ``fn`` the number of false negatives.
+
+    If ``pos_label is None`` and in binary classification, this function
+    returns the average precision, recall and F-measure if ``average``
+    is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.
+
+    Parameters
+    ----------
+    y_true : 1d array-like, or label indicator array / sparse matrix
+        Ground truth (correct) target values.
+
+    y_pred : 1d array-like, or label indicator array / sparse matrix
+        Estimated targets as returned by a classifier.
+
+    labels : list, optional
+        The set of labels to include when ``average != 'binary'``, and their
+        order if ``average is None``. Labels present in the data can be
+        excluded, for example to calculate a multiclass average ignoring a
+        majority negative class, while labels not present in the data will
+        result in 0 components in a macro average. For multilabel targets,
+        labels are column indices. By default, all labels in ``y_true`` and
+        ``y_pred`` are used in sorted order.
+
+    pos_label : str or int, 1 by default
+        The class to report if ``average='binary'`` and the data is binary.
+        If the data are multiclass or multilabel, this will be ignored;
+        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
+        scores for that label only.
+
+    average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \
+                       'weighted']
+        If ``None``, the scores for each class are returned. Otherwise, this
+        determines the type of averaging performed on the data:
+
+        ``'binary'``:
+            Only report results for the class specified by ``pos_label``.
+            This is applicable only if targets (``y_{true,pred}``) are binary.
+        ``'micro'``:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
+        ``'macro'``:
+            Calculate metrics for each label, and find their unweighted
+            mean.  This does not take label imbalance into account.
+        ``'weighted'``:
+            Calculate metrics for each label, and find their average weighted
+            by support (the number of true instances for each label). This
+            alters 'macro' to account for label imbalance; it can result in an
+            F-score that is not between precision and recall.
+        ``'samples'``:
+            Calculate metrics for each instance, and find their average (only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`).
+
+    warn_for : tuple or set, for internal use
+        This determines which warnings will be made in the case that this
+        function is being used to return only one of its metrics.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+
+    zero_division : "warn", 0 or 1, default="warn"
+        Sets the value to return when there is a zero division:
+           - recall: when there are no positive labels
+           - precision: when there are no positive predictions
+           - f-score: both
+
+        If set to "warn", this acts as 0, but warnings are also raised.
+
+    Returns
+    -------
+    tpr : float (if average is not None) or array of float, shape =\
+        [n_unique_labels]
+
+    fpr : float (if average is not None) or array of float, , shape =\
+        [n_unique_labels]
+
+    tnr : float (if average is not None) or array of float, shape =\
+        [n_unique_labels]
+
+    fnr : float (if average is not None) or array of float, shape =\
+        [n_unique_labels]
+        The number of occurrences of each label in ``y_true``.
+
+    References
+    ----------
+    .. [1] `Wikipedia entry for confusion matrix
+           <https://en.wikipedia.org/wiki/Confusion_matrix>`_
+
+    .. [2] `Discriminative Methods for Multi-labeled Classification Advances
+           in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu
+           Godbole, Sunita Sarawagi
+           <http://www.godbole.net/shantanu/pubs/multilabelsvm-pakdd04.pdf>`_
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.metrics import precision_recall_fscore_support
+    >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
+    >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
+    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro')
+    (0.33333333333333331, 0.0, 0.0, 0.66666666666666663)
+    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro')
+    (0.33333333333333331, 0.0, 0.0, 0.66666666666666663)
+    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted')
+    (0.22..., 0.33..., 0.26..., None)
+
+    It is possible to compute per-label fpr, fnr, tnr, tpr and
+    supports instead of averaging:
+
+    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None,
+    ... labels=['pig', 'dog', 'cat'])
+    (array([0, 0, 1]), array([0, 0, 0]),
+     array([0, 0, 0]), array([1, 1, 0]))
+
+    Notes
+    -----
+    When ``true positive + false negative == 0``, TPR, FNR are not undefined;
+    When ``true negative + false positive == 0``, FPR, TNR are not undefined.
+    In such cases, by default the metric will be set to 0, as will f-score,
+    and ``UndefinedMetricWarning`` will be raised. This behavior can be
+    modified with ``zero_division``.
+    """
+    _check_zero_division(zero_division)
+
+    labels = _check_set_wise_labels(y_true, y_pred, average, labels,
+                                    pos_label)
+
+    # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum ###
+    samplewise = average == 'samples'
+    MCM = multilabel_confusion_matrix(y_true, y_pred,
+                                      sample_weight=sample_weight,
+                                      labels=labels, samplewise=samplewise)
+    tn_sum = MCM[:, 0, 0]
+    fp_sum = MCM[:, 0, 1]
+    fn_sum = MCM[:, 1, 0]
+    tp_sum = MCM[:, 1, 1]
+    pred_sum = tp_sum + MCM[:, 0, 1]
+    neg_sum = tn_sum+fp_sum
+    pos_sum = fn_sum+tp_sum
+
+
+    if average == 'micro':
+        fp_sum = np.array([fp_sum.sum()])
+        tn_sum = np.array([tn_sum.sum()])
+        fn_sum = np.array([fn_sum.sum()])
+        neg_sum = np.array([neg_sum.sum()])
+        pos_sum = np.array([pos_sum.sum()])
+
+    # Divide, and on zero-division, set scores and/or warn according to
+    # zero_division:
+    fpr = _prf_divide(fp_sum, neg_sum, 'fpr',
+                            'negatives', average, warn_for, zero_division)
+    tnr = _prf_divide(tn_sum, neg_sum, 'tnr',
+                            'negatives', average, warn_for, zero_division)
+    fnr = _prf_divide(fn_sum, pos_sum, 'fnr',
+                            'positives', average, warn_for, zero_division)
+    tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
+                            'positives', average, warn_for, zero_division)
+
+    # Average the results
+    if average == 'weighted':
+        weights = pos_sum
+        if weights.sum() == 0:
+            zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
+            # precision is zero_division if there are no positive predictions
+            # recall is zero_division if there are no positive labels
+            # fscore is zero_division if all labels AND predictions are
+            # negative
+            return (zero_division_value if pred_sum.sum() == 0 else 0,
+                    zero_division_value,
+                    zero_division_value if pred_sum.sum() == 0 else 0)
+
+    elif average == 'samples':
+        weights = sample_weight
+    else:
+        weights = None
+
+    if average is not None:
+        assert average != 'binary' or len(fpr) == 1
+        fpr = np.average(fpr, weights=weights)
+        tnr = np.average(tnr, weights=weights)
+        fnr = np.average(fnr, weights=weights)
+        tpr = np.average(tpr, weights=weights)
+    return tpr, fpr, tnr, fnr
+
+
+def precision_score(y_true, y_pred, labels=None, pos_label=1,
                     average='binary', sample_weight=None,
                     zero_division="warn"):
     """Compute the precision

From 64a5a7b2fd6069726ca54fe2c2472f450c7fcbe2 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 16:07:58 -0700
Subject: [PATCH 002/127] changed the true postive sum in the function

---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index d4b0f4c7347b6..43a246c24f36e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1662,14 +1662,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     neg_sum = tn_sum+fp_sum
     pos_sum = fn_sum+tp_sum
 
-
     if average == 'micro':
+        tp_sum = np.array([tp_sum.sum()])
         fp_sum = np.array([fp_sum.sum()])
         tn_sum = np.array([tn_sum.sum()])
         fn_sum = np.array([fn_sum.sum()])
         neg_sum = np.array([neg_sum.sum()])
         pos_sum = np.array([pos_sum.sum()])
-
+        pred_sum = np.array([pred_sum.sum()])
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
     fpr = _prf_divide(fp_sum, neg_sum, 'fpr',

From 523eaa094051bdd1ef69f72c693648cf79062813 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 16:30:02 -0700
Subject: [PATCH 003/127] add print

---
 sklearn/metrics/_classification.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 43a246c24f36e..a06d4d5b39ced 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1661,7 +1661,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     pred_sum = tp_sum + MCM[:, 0, 1]
     neg_sum = tn_sum+fp_sum
     pos_sum = fn_sum+tp_sum
-
+    print('before micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum)
     if average == 'micro':
         tp_sum = np.array([tp_sum.sum()])
         fp_sum = np.array([fp_sum.sum()])
@@ -1670,17 +1670,19 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
         neg_sum = np.array([neg_sum.sum()])
         pos_sum = np.array([pos_sum.sum()])
         pred_sum = np.array([pred_sum.sum()])
+    print('after micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum)
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
+    print('before divide',tpr,fpr,tnr,fnr)
+    tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
+                            'positives', average, warn_for, zero_division)
     fpr = _prf_divide(fp_sum, neg_sum, 'fpr',
                             'negatives', average, warn_for, zero_division)
     tnr = _prf_divide(tn_sum, neg_sum, 'tnr',
                             'negatives', average, warn_for, zero_division)
     fnr = _prf_divide(fn_sum, pos_sum, 'fnr',
                             'positives', average, warn_for, zero_division)
-    tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
-                            'positives', average, warn_for, zero_division)
-
+    print('after divide',tpr,fpr,tnr,fnr)
     # Average the results
     if average == 'weighted':
         weights = pos_sum
@@ -1698,13 +1700,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
         weights = sample_weight
     else:
         weights = None
-
+    print('before avg', tpr, fpr, tnr, fnr,weights)
     if average is not None:
         assert average != 'binary' or len(fpr) == 1
         fpr = np.average(fpr, weights=weights)
         tnr = np.average(tnr, weights=weights)
         fnr = np.average(fnr, weights=weights)
         tpr = np.average(tpr, weights=weights)
+    print('after avg', tpr, fpr, tnr, fnr)
     return tpr, fpr, tnr, fnr
 
 
From b9772166aeb75430e20ae2fe17b293acc503d365 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 16:53:37 -0700
Subject: [PATCH 004/127] remove one print

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index a06d4d5b39ced..62de8cbe91597 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1673,7 +1673,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     print('after micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum)
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
-    print('before divide',tpr,fpr,tnr,fnr)
+    #print('before divide',tpr,fpr,tnr,fnr)
     tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
                             'positives', average, warn_for, zero_division)
     fpr = _prf_divide(fp_sum, neg_sum, 'fpr',

From 5a061ef1f2eb95ff5180852cffd2f71389146c2d Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 17:20:58 -0700
Subject: [PATCH 005/127] remove print statements

---
 sklearn/metrics/_classification.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 62de8cbe91597..48f4ae38d6ac0 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1661,7 +1661,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     pred_sum = tp_sum + MCM[:, 0, 1]
     neg_sum = tn_sum+fp_sum
     pos_sum = fn_sum+tp_sum
-    print('before micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum)
     if average == 'micro':
         tp_sum = np.array([tp_sum.sum()])
         fp_sum = np.array([fp_sum.sum()])
@@ -1670,10 +1669,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
         neg_sum = np.array([neg_sum.sum()])
         pos_sum = np.array([pos_sum.sum()])
         pred_sum = np.array([pred_sum.sum()])
-    print('after micro',tn_sum, fp_sum, fn_sum, tp_sum, pred_sum, neg_sum, pos_sum)
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
-    #print('before divide',tpr,fpr,tnr,fnr)
     tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
                             'positives', average, warn_for, zero_division)
     fpr = _prf_divide(fp_sum, neg_sum, 'fpr',
@@ -1682,7 +1679,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
                             'negatives', average, warn_for, zero_division)
     fnr = _prf_divide(fn_sum, pos_sum, 'fnr',
                             'positives', average, warn_for, zero_division)
-    print('after divide',tpr,fpr,tnr,fnr)
     # Average the results
     if average == 'weighted':
         weights = pos_sum
@@ -1700,14 +1696,12 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
         weights = sample_weight
     else:
         weights = None
-    print('before avg', tpr, fpr, tnr, fnr,weights)
     if average is not None:
         assert average != 'binary' or len(fpr) == 1
         fpr = np.average(fpr, weights=weights)
         tnr = np.average(tnr, weights=weights)
         fnr = np.average(fnr, weights=weights)
         tpr = np.average(tpr, weights=weights)
-    print('after avg', tpr, fpr, tnr, fnr)
     return tpr, fpr, tnr, fnr
 
 
From 64939770f6e61e39580af48e068776a3b85c6e86 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 17:24:07 -0700
Subject: [PATCH 006/127] add coauthors. Co-authored-by: samskruthi padigepati
 <https://github.com/ddhar1> Co-authored-by: Divya
 Dhar<https://github.com/samskruthireddy>

---
 sklearn/metrics/_classification.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 48f4ae38d6ac0..b93fa3157e54d 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1661,6 +1661,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     pred_sum = tp_sum + MCM[:, 0, 1]
     neg_sum = tn_sum+fp_sum
     pos_sum = fn_sum+tp_sum
+
     if average == 'micro':
         tp_sum = np.array([tp_sum.sum()])
         fp_sum = np.array([fp_sum.sum()])
@@ -1669,6 +1670,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
         neg_sum = np.array([neg_sum.sum()])
         pos_sum = np.array([pos_sum.sum()])
         pred_sum = np.array([pred_sum.sum()])
+
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
     tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
@@ -2656,4 +2658,4 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
         else:
             pos_label = y_true.max()
     y_true = np.array(y_true == pos_label, int)
-    return np.average((y_true - y_prob) ** 2, weights=sample_weight)
+    return np.average((y_true - y_prob) ** 2, weights=sample_weight)
\ No newline at end of file

From 141fa4ae28dc815586ba9336c893bfa7fd813615 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 17:43:29 -0700
Subject: [PATCH 007/127] fix doc string outputs Co-authored-by: samskruthi
 padigepati <https://github.com/ddhar1> Co-authored-by: Divya
 Dhar<https://github.com/samskruthireddy>

---
 sklearn/metrics/_classification.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b93fa3157e54d..8efbd884694f8 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1622,19 +1622,18 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro')
-    (0.33333333333333331, 0.0, 0.0, 0.66666666666666663)
+    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666)
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro')
-    (0.33333333333333331, 0.0, 0.0, 0.66666666666666663)
+    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666)
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted')
-    (0.22..., 0.33..., 0.26..., None)
+    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666)
 
     It is possible to compute per-label fpr, fnr, tnr, tpr and
     supports instead of averaging:
 
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None,
     ... labels=['pig', 'dog', 'cat'])
-    (array([0, 0, 1]), array([0, 0, 0]),
-     array([0, 0, 0]), array([1, 1, 0]))
+    (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]), array([0.75, 0.5 , 0.75]), array([1., 1., 0.]))
 
     Notes
     -----

From 9615ae881f790a2fd79cf9790d99e96d4ed830aa Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sat, 2 Nov 2019 17:53:36 -0700
Subject: [PATCH 008/127] pep8 test Co-authored-by: Divya Dhar 
 <https://github.com/ddhar1> Co-authored-by: samskruthi padigepati
 <https://github.com/samskruthireddy>

---
 sklearn/metrics/_classification.py | 46 ++++++++++++++++--------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 8efbd884694f8..802d0b78630bb 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1504,12 +1504,10 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
     return precision, recall, f_score, true_sum
 
 
-<<<<<<< HEAD
-@_deprecate_positional_args
-def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
-=======
-def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=None,
-                           warn_for=('tpr', 'fpr', 'tnr', 'fnr'), sample_weight=None,zero_division="warn"):
+def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
+                           average=None, warn_for=('tpr', 'fpr',
+                                                   'tnr', 'fnr'),
+                           sample_weight=None, zero_division="warn"):
     """Compute TPR, FPR, TNR, FNR for each class
 
     The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
@@ -1622,18 +1620,22 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro')
-    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666)
+    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
+     0.6666666666666666)
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro')
-    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666)
+    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
+     0.6666666666666666)
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted')
-    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 0.6666666666666666)
+    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
+    0.6666666666666666)
 
     It is possible to compute per-label fpr, fnr, tnr, tpr and
     supports instead of averaging:
 
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None,
     ... labels=['pig', 'dog', 'cat'])
-    (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]), array([0.75, 0.5 , 0.75]), array([1., 1., 0.]))
+    (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]),
+    array([0.75, 0.5 , 0.75]), array([1., 1., 0.]))
 
     Notes
     -----
@@ -1645,8 +1647,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
     """
     _check_zero_division(zero_division)
 
-    labels = _check_set_wise_labels(y_true, y_pred, average, labels,
-                                    pos_label)
+    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
     # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum ###
     samplewise = average == 'samples'
@@ -1672,14 +1673,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1, average=Non
 
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
-    tpr = _prf_divide(tp_sum, pos_sum, 'tpr',
-                            'positives', average, warn_for, zero_division)
-    fpr = _prf_divide(fp_sum, neg_sum, 'fpr',
-                            'negatives', average, warn_for, zero_division)
-    tnr = _prf_divide(tn_sum, neg_sum, 'tnr',
-                            'negatives', average, warn_for, zero_division)
-    fnr = _prf_divide(fn_sum, pos_sum, 'fnr',
-                            'positives', average, warn_for, zero_division)
+    tpr = _prf_divide(tp_sum, pos_sum, 'tpr', 'positives',
+                      average, warn_for, zero_division)
+    fpr = _prf_divide(fp_sum, neg_sum, 'fpr', 'negatives',
+                      average, warn_for, zero_division)
+    tnr = _prf_divide(tn_sum, neg_sum, 'tnr', 'negatives',
+                      average, warn_for, zero_division)
+    fnr = _prf_divide(fn_sum, pos_sum, 'fnr', 'positives',
+                      average, warn_for, zero_division)
     # Average the results
     if average == 'weighted':
         weights = pos_sum
@@ -2338,7 +2339,8 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
+    y_pred : array-like of float, shape = (n_samples, n_classes)
+    or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the
@@ -2657,4 +2659,4 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
         else:
             pos_label = y_true.max()
     y_true = np.array(y_true == pos_label, int)
-    return np.average((y_true - y_prob) ** 2, weights=sample_weight)
\ No newline at end of file
+    return np.average((y_true - y_prob) ** 2, weights=sample_weight)

From 79e156215a02fa8fe91d412be602dccbaf64a089 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sun, 3 Nov 2019 01:27:19 -0700
Subject: [PATCH 009/127] trivial Co-authored-by: samskruthi padigepati
 <https://github.com/samskruthireddy>

---
 sklearn/metrics/_classification.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 802d0b78630bb..2433145c6d040 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1654,6 +1654,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
     MCM = multilabel_confusion_matrix(y_true, y_pred,
                                       sample_weight=sample_weight,
                                       labels=labels, samplewise=samplewise)
+
     tn_sum = MCM[:, 0, 0]
     fp_sum = MCM[:, 0, 1]
     fn_sum = MCM[:, 1, 0]

From 8f21052163957fa06b188ea20f2f5c1a90ebb5ea Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sun, 3 Nov 2019 03:39:37 -0800
Subject: [PATCH 010/127] remove imported but unused flake8

---
 sklearn/metrics/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index b9d7ec3bd0a9e..08e0c7844973e 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -153,6 +153,7 @@
     'SCORERS',
     'silhouette_samples',
     'silhouette_score',
+    'tpr_fpr_tnr_fnr_scores',
     'v_measure_score',
     'zero_one_loss',
     'brier_score_loss',

From 3ffd83095b67be2df3a7657b98e7692e77603543 Mon Sep 17 00:00:00 2001
From: samskruthi reddy padigepati <samskruthi@jungla.bio>
Date: Sun, 3 Nov 2019 13:28:18 -0800
Subject: [PATCH 011/127] to trigger test

---
 sklearn/metrics/_classification.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 2433145c6d040..802d0b78630bb 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1654,7 +1654,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
     MCM = multilabel_confusion_matrix(y_true, y_pred,
                                       sample_weight=sample_weight,
                                       labels=labels, samplewise=samplewise)
-
     tn_sum = MCM[:, 0, 0]
     fp_sum = MCM[:, 0, 1]
     fn_sum = MCM[:, 1, 0]

From fb73c6e86cd6afc7e39653ce5778259d180601a8 Mon Sep 17 00:00:00 2001
From: haochunchang <changhaochun84@gmail.com>
Date: Mon, 18 May 2020 22:07:46 +0800
Subject: [PATCH 012/127] Take over PR #15522

Modify doc and add deprecation to position arg.
---
 sklearn/metrics/_classification.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 802d0b78630bb..dd95b04fb6d2e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1504,7 +1504,8 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
     return precision, recall, f_score, true_sum
 
 
-def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
+@_deprecate_positional_args
+def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
                            average=None, warn_for=('tpr', 'fpr',
                                                    'tnr', 'fnr'),
                            sample_weight=None, zero_division="warn"):
@@ -1523,8 +1524,9 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
     true positives and ``fn`` the number of false negatives.
 
     If ``pos_label is None`` and in binary classification, this function
-    returns the average precision, recall and F-measure if ``average``
-    is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.
+    returns the true positive rate, false positive rate, true negative rate
+    and false negative rate if ``average`` is one of ``'micro'``, ``'macro'``,
+    ``'weighted'`` or ``'samples'``.
 
     Parameters
     ----------
@@ -1566,8 +1568,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
         ``'weighted'``:
             Calculate metrics for each label, and find their average weighted
             by support (the number of true instances for each label). This
-            alters 'macro' to account for label imbalance; it can result in an
-            F-score that is not between precision and recall.
+            alters 'macro' to account for label imbalance.
         ``'samples'``:
             Calculate metrics for each instance, and find their average (only
             meaningful for multilabel classification where this differs from
@@ -1593,7 +1594,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
     tpr : float (if average is not None) or array of float, shape =\
         [n_unique_labels]
 
-    fpr : float (if average is not None) or array of float, , shape =\
+    fpr : float (if average is not None) or array of float, shape =\
         [n_unique_labels]
 
     tnr : float (if average is not None) or array of float, shape =\
@@ -1616,7 +1617,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
     Examples
     --------
     >>> import numpy as np
-    >>> from sklearn.metrics import precision_recall_fscore_support
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro')
@@ -1707,7 +1707,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, labels=None, pos_label=1,
     return tpr, fpr, tnr, fnr
 
 
-def precision_score(y_true, y_pred, labels=None, pos_label=1,
+@_deprecate_positional_args
+def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
                     average='binary', sample_weight=None,
                     zero_division="warn"):
     """Compute the precision
@@ -2339,8 +2340,7 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes)
-    or (n_samples,)
+    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the

From c780053a4809f961c40350317ab56a97cb9df377 Mon Sep 17 00:00:00 2001
From: haochunchang <changhaochun84@gmail.com>
Date: Mon, 18 May 2020 23:31:41 +0800
Subject: [PATCH 013/127] Modify doc and zero-division in the weighted average.

Add test for binary classification.
(Modify some lines to pass flake8)
---
 sklearn/metrics/_classification.py           | 31 ++++++++++----------
 sklearn/metrics/tests/test_classification.py | 31 +++++++++++++++++---
 2 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index dd95b04fb6d2e..62669dbd5970d 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1509,7 +1509,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
                            average=None, warn_for=('tpr', 'fpr',
                                                    'tnr', 'fnr'),
                            sample_weight=None, zero_division="warn"):
-    """Compute TPR, FPR, TNR, FNR for each class
+    """Compute True Positive Rate (TPR), False Positive Rate (FPR),\
+    True Negative Rate (TNR), False Negative Rate (FNR) for each class
 
     The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
     true positives and ``fn`` the number of false negatives.
@@ -1583,9 +1584,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     zero_division : "warn", 0 or 1, default="warn"
         Sets the value to return when there is a zero division:
-           - recall: when there are no positive labels
-           - precision: when there are no positive predictions
-           - f-score: both
+           - tpr, fnr: when there are no positive labels
+           - fpr, tnr: when there are no negative labels
 
         If set to "warn", this acts as 0, but warnings are also raised.
 
@@ -1649,7 +1649,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
-    # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum ###
+    # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum
     samplewise = average == 'samples'
     MCM = multilabel_confusion_matrix(y_true, y_pred,
                                       sample_weight=sample_weight,
@@ -1659,8 +1659,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
     fn_sum = MCM[:, 1, 0]
     tp_sum = MCM[:, 1, 1]
     pred_sum = tp_sum + MCM[:, 0, 1]
-    neg_sum = tn_sum+fp_sum
-    pos_sum = fn_sum+tp_sum
+    neg_sum = tn_sum + fp_sum
+    pos_sum = fn_sum + tp_sum
 
     if average == 'micro':
         tp_sum = np.array([tp_sum.sum()])
@@ -1686,18 +1686,18 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
         weights = pos_sum
         if weights.sum() == 0:
             zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
-            # precision is zero_division if there are no positive predictions
-            # recall is zero_division if there are no positive labels
-            # fscore is zero_division if all labels AND predictions are
-            # negative
-            return (zero_division_value if pred_sum.sum() == 0 else 0,
-                    zero_division_value,
-                    zero_division_value if pred_sum.sum() == 0 else 0)
+            # TPR and FNR is zero_division if there are no positive labels
+            # FPR and TNR is zero_division if there are no negative labels
+            return (zero_division_value if pos_sum == 0 else 0,
+                    zero_division_value if neg_sum == 0 else 0,
+                    zero_division_value if neg_sum == 0 else 0,
+                    zero_division_value if pos_sum == 0 else 0)
 
     elif average == 'samples':
         weights = sample_weight
     else:
         weights = None
+
     if average is not None:
         assert average != 'binary' or len(fpr) == 1
         fpr = np.average(fpr, weights=weights)
@@ -2340,7 +2340,8 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
+    y_pred : array-like of float, shape = (n_samples, n_classes) \
+    or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 1f959d95ce844..d118e72dd0cad 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -42,6 +42,7 @@
 from sklearn.metrics import precision_recall_fscore_support
 from sklearn.metrics import precision_score
 from sklearn.metrics import recall_score
+from sklearn.metrics import tpr_fpr_tnr_fnr_scores
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import brier_score_loss
 from sklearn.metrics import multilabel_confusion_matrix
@@ -213,6 +214,26 @@ def test_precision_recall_f1_score_binary():
                             (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
 
 
+def test_tpr_fpr_tnr_fnr_scores_binary():
+    # Test TPR, FPR, TNR, FNR Score for binary classification task
+    y_true, y_pred, _ = make_prediction(binary=True)
+
+    # detailed measures for each class
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None)
+    assert_array_almost_equal(tpr, [0.88, 0.68], 2)
+    assert_array_almost_equal(fpr, [0.32, 0.12], 2)
+    assert_array_almost_equal(tnr, [0.68, 0.88], 2)
+    assert_array_almost_equal(fnr, [0.12, 0.32], 2)
+
+    tn, fp, fn, tp = assert_no_warnings(
+        confusion_matrix, y_true, y_pred
+    ).ravel()
+    assert_array_almost_equal(tp / (tp + fn), 0.68, 2)
+    assert_array_almost_equal(fp / (tn + fp), 0.12, 2)
+    assert_array_almost_equal(tn / (tn + fp), 0.88, 2)
+    assert_array_almost_equal(fn / (tp + fn), 0.32, 2)
+
+
 @ignore_warnings
 def test_precision_recall_f_binary_single_class():
     # Test precision, recall and F-scores behave with a single positive or
@@ -2064,8 +2085,9 @@ def test_hinge_loss_multiclass():
     ])
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
-    assert (hinge_loss(y_true, pred_decision) ==
-                 dummy_hinge_loss)
+    assert (
+        hinge_loss(y_true, pred_decision) == dummy_hinge_loss
+    )
 
 
 def test_hinge_loss_multiclass_missing_labels_with_labels_none():
@@ -2101,8 +2123,9 @@ def test_hinge_loss_multiclass_with_missing_labels():
     ])
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
-    assert (hinge_loss(y_true, pred_decision, labels=labels) ==
-                 dummy_hinge_loss)
+    assert (
+        hinge_loss(y_true, pred_decision, labels=labels) == dummy_hinge_loss
+    )
 
 
 def test_hinge_loss_multiclass_invariance_lists():

From 408c2dbf67032cee899a565ed66c96bf9ffb923b Mon Sep 17 00:00:00 2001
From: haochunchang <changhaochun84@gmail.com>
Date: Tue, 19 May 2020 22:39:32 +0800
Subject: [PATCH 014/127] Add tests for binary, multiclass and empty
 prediction.

---
 sklearn/metrics/tests/test_classification.py | 159 ++++++++++++++++---
 1 file changed, 139 insertions(+), 20 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index d118e72dd0cad..4b0dfd9ff6ec1 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -214,26 +214,6 @@ def test_precision_recall_f1_score_binary():
                             (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
 
 
-def test_tpr_fpr_tnr_fnr_scores_binary():
-    # Test TPR, FPR, TNR, FNR Score for binary classification task
-    y_true, y_pred, _ = make_prediction(binary=True)
-
-    # detailed measures for each class
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None)
-    assert_array_almost_equal(tpr, [0.88, 0.68], 2)
-    assert_array_almost_equal(fpr, [0.32, 0.12], 2)
-    assert_array_almost_equal(tnr, [0.68, 0.88], 2)
-    assert_array_almost_equal(fnr, [0.12, 0.32], 2)
-
-    tn, fp, fn, tp = assert_no_warnings(
-        confusion_matrix, y_true, y_pred
-    ).ravel()
-    assert_array_almost_equal(tp / (tp + fn), 0.68, 2)
-    assert_array_almost_equal(fp / (tn + fp), 0.12, 2)
-    assert_array_almost_equal(tn / (tn + fp), 0.88, 2)
-    assert_array_almost_equal(fn / (tp + fn), 0.32, 2)
-
-
 @ignore_warnings
 def test_precision_recall_f_binary_single_class():
     # Test precision, recall and F-scores behave with a single positive or
@@ -328,6 +308,145 @@ def test_precision_recall_f_ignored_labels():
                     recall_all(average=average))
 
 
+def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
+    # Test TPR, FPR, TNR, FNR Score for binary classification task
+    y_true, y_pred, _ = make_prediction(binary=True)
+
+    # compute scores with default labels introspection
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average=None
+    )
+    assert_array_almost_equal(tprs, [0.88, 0.68], 2)
+    assert_array_almost_equal(fprs, [0.32, 0.12], 2)
+    assert_array_almost_equal(tnrs, [0.68, 0.88], 2)
+    assert_array_almost_equal(fnrs, [0.12, 0.32], 2)
+
+    tn, fp, fn, tp = assert_no_warnings(
+        confusion_matrix, y_true, y_pred
+    ).ravel()
+    assert_array_almost_equal(tp / (tp + fn), 0.68, 2)
+    assert_array_almost_equal(fp / (tn + fp), 0.12, 2)
+    assert_array_almost_equal(tn / (tn + fp), 0.88, 2)
+    assert_array_almost_equal(fn / (tp + fn), 0.32, 2)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average='macro'
+    )
+    assert tpr == np.mean(tprs)
+    assert fpr == np.mean(fprs)
+    assert tnr == np.mean(tnrs)
+    assert fnr == np.mean(fnrs)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average='weighted'
+    )
+    support = np.bincount(y_true)
+    assert tpr == np.average(tprs, weights=support)
+    assert fpr == np.average(fprs, weights=support)
+    assert tnr == np.average(tnrs, weights=support)
+    assert fnr == np.average(fnrs, weights=support)
+
+
+def test_tpr_fpr_tnr_fnr_scores_multiclass():
+    # Test TPR, FPR, TNR, FNR Score for multiclass classification task
+    y_true, y_pred, _ = make_prediction(binary=False)
+
+    # compute scores with default labels introspection
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average=None
+    )
+    assert_array_almost_equal(tprs, [0.79, 0.1, 0.9], 2)
+    assert_array_almost_equal(fprs, [0.08, 0.14, 0.45], 2)
+    assert_array_almost_equal(tnrs, [0.92, 0.86, 0.55], 2)
+    assert_array_almost_equal(fnrs, [0.21, 0.9, 0.1], 2)
+
+    # averaging tests
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average='micro'
+    )
+    assert_array_almost_equal(tpr, 0.53, 2)
+    assert_array_almost_equal(fpr, 0.23, 2)
+    assert_array_almost_equal(tnr, 0.77, 2)
+    assert_array_almost_equal(fnr, 0.47, 2)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average='macro'
+    )
+    assert_array_almost_equal(tpr, 0.6, 2)
+    assert_array_almost_equal(fpr, 0.22, 2)
+    assert_array_almost_equal(tnr, 0.78, 2)
+    assert_array_almost_equal(fnr, 0.4, 2)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, average='weighted'
+    )
+    assert_array_almost_equal(tpr, 0.53, 2)
+    assert_array_almost_equal(fpr, 0.2, 2)
+    assert_array_almost_equal(tnr, 0.8, 2)
+    assert_array_almost_equal(fnr, 0.47, 2)
+
+    with pytest.raises(ValueError):
+        tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="samples")
+
+    # same prediction but with and explicit label ordering
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred, labels=[0, 2, 1], average=None
+    )
+    assert_array_almost_equal(tpr, [0.79, 0.9, 0.1], 2)
+    assert_array_almost_equal(fpr, [0.08, 0.45, 0.14], 2)
+    assert_array_almost_equal(tnr, [0.92, 0.55, 0.86], 2)
+    assert_array_almost_equal(fnr, [0.21, 0.1, 0.9], 2)
+
+
+@ignore_warnings
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
+    y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
+    y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
+
+    zero_division = 1.0 if zero_division == 1.0 else 0.0
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
+                                                average=None,
+                                                zero_division=zero_division)
+    assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division], 2)
+    assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2)
+    assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
+    assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division], 2)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
+                                                average="macro",
+                                                zero_division=zero_division)
+    assert_almost_equal(tpr, 0.625 if zero_division else 0.375)
+    assert_almost_equal(fpr, 1 / 3.0 / 4.0)
+    assert_almost_equal(tnr, 0.91666, 5)
+    assert_almost_equal(fnr, 0.625 if zero_division else 0.375)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
+                                                average="micro",
+                                                zero_division=zero_division)
+    assert_almost_equal(tpr, 0.5)
+    assert_almost_equal(fpr, 0.125)
+    assert_almost_equal(tnr, 0.875)
+    assert_almost_equal(fnr, 0.5)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
+                                                average="weighted",
+                                                zero_division=zero_division)
+    assert_almost_equal(tpr, 0.5)
+    assert_almost_equal(fpr, 0)
+    assert_almost_equal(tnr, 1.0)
+    assert_almost_equal(fnr, 0.5)
+
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
+                                                average="samples",
+                                                sample_weight=[1, 1, 2],
+                                                zero_division=zero_division)
+    assert_almost_equal(tpr, 0.5)
+    assert_almost_equal(fpr, 0.08333, 5)
+    assert_almost_equal(tnr, 0.91666, 5)
+    assert_almost_equal(fnr, 0.5)
+
+
 def test_average_precision_score_score_non_binary_class():
     # Test that average_precision_score function returns an error when trying
     # to compute average_precision_score for multiclass task.

From 4adfe2e7b43215a54cbb682c31bd3cadb8d559a8 Mon Sep 17 00:00:00 2001
From: haochunchang <changhaochun84@gmail.com>
Date: Wed, 20 May 2020 09:06:12 +0800
Subject: [PATCH 015/127] Add tpr_fpr_tnr_fnr_scores to test_common.py.

---
 sklearn/metrics/_classification.py   |  8 ++++----
 sklearn/metrics/tests/test_common.py | 29 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 62669dbd5970d..0ba8596a37b81 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1688,10 +1688,10 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
             zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
             # TPR and FNR is zero_division if there are no positive labels
             # FPR and TNR is zero_division if there are no negative labels
-            return (zero_division_value if pos_sum == 0 else 0,
-                    zero_division_value if neg_sum == 0 else 0,
-                    zero_division_value if neg_sum == 0 else 0,
-                    zero_division_value if pos_sum == 0 else 0)
+            return (zero_division_value if pos_sum.sum() == 0 else 0,
+                    zero_division_value if neg_sum.sum() == 0 else 0,
+                    zero_division_value if neg_sum.sum() == 0 else 0,
+                    zero_division_value if pos_sum.sum() == 0 else 0)
 
     elif average == 'samples':
         weights = sample_weight
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 7301d21a35f39..c54a984c5556b 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -53,6 +53,7 @@
 from sklearn.metrics import recall_score
 from sklearn.metrics import roc_auc_score
 from sklearn.metrics import roc_curve
+from sklearn.metrics import tpr_fpr_tnr_fnr_scores
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import ndcg_score
 from sklearn.metrics import dcg_score
@@ -140,6 +141,9 @@
     "f2_score": partial(fbeta_score, beta=2),
     "f0.5_score": partial(fbeta_score, beta=0.5),
     "matthews_corrcoef_score": matthews_corrcoef,
+    "tpr_fpr_tnr_fnr_scores": tpr_fpr_tnr_fnr_scores,
+    "binary_tpr_fpr_tnr_fnr_scores":
+    partial(tpr_fpr_tnr_fnr_scores, average="binary"),
 
     "weighted_f0.5_score": partial(fbeta_score, average="weighted", beta=0.5),
     "weighted_f1_score": partial(f1_score, average="weighted"),
@@ -147,6 +151,8 @@
     "weighted_precision_score": partial(precision_score, average="weighted"),
     "weighted_recall_score": partial(recall_score, average="weighted"),
     "weighted_jaccard_score": partial(jaccard_score, average="weighted"),
+    "weighted_tpr_fpr_tnr_fnr_scores":
+    partial(tpr_fpr_tnr_fnr_scores, average="weighted"),
 
     "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5),
     "micro_f1_score": partial(f1_score, average="micro"),
@@ -154,6 +160,8 @@
     "micro_precision_score": partial(precision_score, average="micro"),
     "micro_recall_score": partial(recall_score, average="micro"),
     "micro_jaccard_score": partial(jaccard_score, average="micro"),
+    "micro_tpr_fpr_tnr_fnr_scores":
+    partial(tpr_fpr_tnr_fnr_scores, average="micro"),
 
     "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5),
     "macro_f1_score": partial(f1_score, average="macro"),
@@ -161,6 +169,8 @@
     "macro_precision_score": partial(precision_score, average="macro"),
     "macro_recall_score": partial(recall_score, average="macro"),
     "macro_jaccard_score": partial(jaccard_score, average="macro"),
+    "macro_tpr_fpr_tnr_fnr_scores":
+    partial(tpr_fpr_tnr_fnr_scores, average="macro"),
 
     "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5),
     "samples_f1_score": partial(f1_score, average="samples"),
@@ -168,6 +178,8 @@
     "samples_precision_score": partial(precision_score, average="samples"),
     "samples_recall_score": partial(recall_score, average="samples"),
     "samples_jaccard_score": partial(jaccard_score, average="samples"),
+    "samples_tpr_fpr_tnr_fnr_scores":
+    partial(tpr_fpr_tnr_fnr_scores, average="samples"),
 
     "cohen_kappa_score": cohen_kappa_score,
 }
@@ -264,6 +276,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_precision_score",
     "samples_recall_score",
     "samples_jaccard_score",
+    "samples_tpr_fpr_tnr_fnr_scores",
     "coverage_error",
     "unnormalized_multilabel_confusion_matrix_sample",
     "label_ranking_loss",
@@ -282,6 +295,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "roc_auc_score",
     "weighted_roc_auc",
 
+    "tpr_fpr_tnr_fnr_scores",
     "average_precision_score",
     "weighted_average_precision_score",
     "micro_average_precision_score",
@@ -295,6 +309,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f1_score",
     "f2_score",
     "f0.5_score",
+    "binary_tpr_fpr_tnr_fnr_scores",
 
     # curves
     "roc_curve",
@@ -326,6 +341,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score",
     "jaccard_score",
 
+    "tpr_fpr_tnr_fnr_scores",
     "average_precision_score",
     "weighted_average_precision_score",
     "micro_average_precision_score",
@@ -354,17 +370,21 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score",
     "jaccard_score",
 
+    "tpr_fpr_tnr_fnr_scores",
     "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
     "weighted_precision_score", "weighted_recall_score",
     "weighted_jaccard_score",
+    "weighted_tpr_fpr_tnr_fnr_scores",
 
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
     "micro_jaccard_score",
+    "micro_tpr_fpr_tnr_fnr_scores",
 
     "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
     "macro_precision_score", "macro_recall_score",
     "macro_jaccard_score",
+    "macro_tpr_fpr_tnr_fnr_scores",
 
     "unnormalized_multilabel_confusion_matrix",
     "unnormalized_multilabel_confusion_matrix_sample",
@@ -406,20 +426,24 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
     "weighted_precision_score", "weighted_recall_score",
     "weighted_jaccard_score",
+    "weighted_tpr_fpr_tnr_fnr_scores",
 
     "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
     "macro_precision_score", "macro_recall_score",
     "macro_jaccard_score",
+    "macro_tpr_fpr_tnr_fnr_scores",
 
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
     "micro_jaccard_score",
+    "micro_tpr_fpr_tnr_fnr_scores",
 
     "unnormalized_multilabel_confusion_matrix",
 
     "samples_f0.5_score", "samples_f1_score", "samples_f2_score",
     "samples_precision_score", "samples_recall_score",
     "samples_jaccard_score",
+    "samples_tpr_fpr_tnr_fnr_scores",
 }
 
 # Regression metrics with "multioutput-continuous" format support
@@ -444,6 +468,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     # P = R = F = accuracy in multiclass case
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
+    "micro_tpr_fpr_tnr_fnr_scores",
 
     "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error",
     "median_absolute_error", "max_error",
@@ -465,6 +490,10 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
 
     "precision_score", "recall_score", "f2_score", "f0.5_score",
 
+    "tpr_fpr_tnr_fnr_scores",
+    "weighted_tpr_fpr_tnr_fnr_scores",
+    "macro_tpr_fpr_tnr_fnr_scores",
+
     "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
     "weighted_precision_score", "weighted_jaccard_score",
     "unnormalized_multilabel_confusion_matrix",

From 53d6fd2f5c9e9c81a8cc7dd4639be87d4c00ec01 Mon Sep 17 00:00:00 2001
From: haochunchang <changhaochun84@gmail.com>
Date: Wed, 20 May 2020 10:32:52 +0800
Subject: [PATCH 016/127] Remove pred_sum variable

---
 sklearn/metrics/_classification.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 0ba8596a37b81..8514e932263be 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1649,7 +1649,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
-    # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pred_sum, pos_sum, neg_sum
+    # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pos_sum, neg_sum
     samplewise = average == 'samples'
     MCM = multilabel_confusion_matrix(y_true, y_pred,
                                       sample_weight=sample_weight,
@@ -1658,7 +1658,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
     fp_sum = MCM[:, 0, 1]
     fn_sum = MCM[:, 1, 0]
     tp_sum = MCM[:, 1, 1]
-    pred_sum = tp_sum + MCM[:, 0, 1]
     neg_sum = tn_sum + fp_sum
     pos_sum = fn_sum + tp_sum
 
@@ -1669,7 +1668,6 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
         fn_sum = np.array([fn_sum.sum()])
         neg_sum = np.array([neg_sum.sum()])
         pos_sum = np.array([pos_sum.sum()])
-        pred_sum = np.array([pred_sum.sum()])
 
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:

From a5b526234f2842a953a58537e0162b70a7ca6717 Mon Sep 17 00:00:00 2001
From: Hao Chun Chang <changhaochun84@gmail.com>
Date: Tue, 29 Sep 2020 22:16:17 +0800
Subject: [PATCH 017/127] Fix linting

---
 sklearn/metrics/tests/test_classification.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index a312268ff3f6b..38619cfde94d6 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2258,14 +2258,9 @@ def test_hinge_loss_multiclass():
     ])
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
-<<<<<<< HEAD
     assert (
         hinge_loss(y_true, pred_decision) == dummy_hinge_loss
     )
-=======
-    assert (hinge_loss(y_true, pred_decision) ==
-            dummy_hinge_loss)
->>>>>>> 13bccedeb02fa650a247a8ab6420bf9d44df3424
 
 
 def test_hinge_loss_multiclass_missing_labels_with_labels_none():

From f74fc1066358e0ca83ba6576c16ee21cad65bba7 Mon Sep 17 00:00:00 2001
From: Hao Chun Chang <changhaochun84@gmail.com>
Date: Mon, 5 Oct 2020 22:35:12 +0800
Subject: [PATCH 018/127] Fix parameter documentation

---
 sklearn/metrics/_classification.py | 36 ++++++++++++++++--------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 843a4f2e02b80..62eb4b42c189f 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1565,13 +1565,15 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     Parameters
     ----------
-    y_true : 1d array-like, or label indicator array / sparse matrix
+    y_true : {array-like, label indicator array, sparse matrix} \
+        of shape (n_samples,)
         Ground truth (correct) target values.
 
-    y_pred : 1d array-like, or label indicator array / sparse matrix
+    y_pred : {array-like, label indicator array, sparse matrix} \
+        of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -1580,14 +1582,14 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
         labels are column indices. By default, all labels in ``y_true`` and
         ``y_pred`` are used in sorted order.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'}, \
+        default=None
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
@@ -1616,7 +1618,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    zero_division : "warn", 0 or 1, default="warn"
+    zero_division : str or int, {'warn', 0, 1}, default="warn"
         Sets the value to return when there is a zero division:
            - tpr, fnr: when there are no positive labels
            - fpr, tnr: when there are no negative labels
@@ -1625,17 +1627,17 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     Returns
     -------
-    tpr : float (if average is not None) or array of float, shape =\
-        [n_unique_labels]
+    tpr : float (if average is not None), \
+        or ndarray of shape (n_unique_labels,)
 
-    fpr : float (if average is not None) or array of float, shape =\
-        [n_unique_labels]
+    fpr : float (if average is not None), \
+        or ndarray of shape (n_unique_labels,)
 
-    tnr : float (if average is not None) or array of float, shape =\
-        [n_unique_labels]
+    tnr : float (if average is not None), \
+        or ndarray of shape (n_unique_labels,)
 
-    fnr : float (if average is not None) or array of float, shape =\
-        [n_unique_labels]
+    fnr : float (if average is not None), \
+        or ndarray of shape (n_unique_labels,)
         The number of occurrences of each label in ``y_true``.
 
     References
@@ -1673,8 +1675,8 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     Notes
     -----
-    When ``true positive + false negative == 0``, TPR, FNR are not undefined;
-    When ``true negative + false positive == 0``, FPR, TNR are not undefined.
+    When ``true positive + false negative == 0``, TPR, FNR are undefined;
+    When ``true negative + false positive == 0``, FPR, TNR are undefined.
     In such cases, by default the metric will be set to 0, as will f-score,
     and ``UndefinedMetricWarning`` will be raised. This behavior can be
     modified with ``zero_division``.

From 44f8b7d789d1d82039f1311059563da96093f288 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 11:13:07 +0100
Subject: [PATCH 019/127] Add specificity_score as a single metric

---
 sklearn/metrics/_classification.py           | 121 ++++++++++++++++++-
 sklearn/metrics/tests/test_classification.py |  32 +++++
 2 files changed, 152 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 62eb4b42c189f..74edcc069954e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1864,7 +1864,7 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
 @_deprecate_positional_args
 def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
                  sample_weight=None, zero_division="warn"):
-    """Compute the recall.
+    """Compute the recall, also known as sensitivity or true positive rate.
 
     The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
     true positives and ``fn`` the number of false negatives. The recall is
@@ -1979,6 +1979,125 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
     return r
 
 
+@_deprecate_positional_args
+def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
+                 average='binary', sample_weight=None, zero_division="warn"):
+    """Compute specificity, also known as true negative rate.
+
+    The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number
+    of true negatives and ``fp`` is the number of false positives.
+    The specificity is intuitively the ability of the classifier to find
+    all the negative samples.
+
+    The best value is 1 and the worst value is 0.
+
+    Parameters
+    ----------
+    y_true : 1d array-like, or label indicator array / sparse matrix
+        Ground truth (correct) target values.
+
+    y_pred : 1d array-like, or label indicator array / sparse matrix
+        Estimated targets as returned by a classifier.
+
+    labels : array-like, default=None
+        The set of labels to include when ``average != 'binary'``, and their
+        order if ``average is None``. Labels present in the data can be
+        excluded, for example to calculate a multiclass average ignoring a
+        majority negative class, while labels not present in the data will
+        result in 0 components in a macro average. For multilabel targets,
+        labels are column indices. By default, all labels in ``y_true`` and
+        ``y_pred`` are used in sorted order.
+
+    pos_label : str or int, default=1
+        The class to report if ``average='binary'`` and the data is binary.
+        If the data are multiclass or multilabel, this will be ignored;
+        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
+        scores for that label only.
+
+    average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'} \
+            default='binary'
+        This parameter is required for multiclass/multilabel targets.
+        If ``None``, the scores for each class are returned. Otherwise, this
+        determines the type of averaging performed on the data:
+
+        ``'binary'``:
+            Only report results for the class specified by ``pos_label``.
+            This is applicable only if targets (``y_{true,pred}``) are binary.
+        ``'micro'``:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
+        ``'macro'``:
+            Calculate metrics for each label, and find their unweighted
+            mean.  This does not take label imbalance into account.
+        ``'weighted'``:
+            Calculate metrics for each label, and find their average weighted
+            by support (the number of true instances for each label). This
+            alters 'macro' to account for label imbalance; it can result in an
+            F-score that is not between precision and recall.
+        ``'samples'``:
+            Calculate metrics for each instance, and find their average (only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`).
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+
+    zero_division : "warn", 0 or 1, default="warn"
+        Sets the value to return when there is a zero division. If set to
+        "warn", this acts as 0, but warnings are also raised.
+
+    Returns
+    -------
+    specificity : float (if average is not None) or array of float of shape
+        (n_unique_labels,)
+        Specificity of the positive class in binary classification or weighted
+        average of the specificity of each class for the multiclass task.
+
+    See Also
+    --------
+    classification_report, precision_recall_fscore_support, recall_score,
+    balanced_accuracy_score, multilabel_confusion_matrix, tpr_fpr_tnr_fnr_scores
+
+    Notes
+    -----
+    When ``true negative + false positive == 0``, specificity returns 0 and
+    raises ``UndefinedMetricWarning``. This behavior can be modified with
+    ``zero_division``.
+
+    References
+    ----------
+    .. [1] `Wikipedia entry for sensitivity and specificity
+           <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_.
+
+    Examples
+    --------
+    >>> from sklearn.metrics import specificity_score
+    >>> y_true = [0, 1, 2, 0, 1, 2]
+    >>> y_pred = [0, 2, 1, 0, 0, 1]
+    >>> specificity_score(y_true, y_pred, average='macro')
+    0.66...
+    >>> specificity_score(y_true, y_pred, average='micro')
+    0.66...
+    >>> specificity_score(y_true, y_pred, average='weighted')
+    0.66...
+    >>> specificity_score(y_true, y_pred, average=None)
+    array([0.75, 0.5 , 0.75])
+    >>> y_true = [0, 0, 0, 0, 0, 0]
+    >>> specificity_score(y_true, y_pred, average=None)
+    array([0.        , 0.66666667, 0.83333333])
+    >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
+    array([1.        , 0.66666667, 0.83333333])
+    """
+    _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
+                                          labels=labels,
+                                          pos_label=pos_label,
+                                          average=average,
+                                          warn_for=('tnr',),
+                                          sample_weight=sample_weight,
+                                          zero_division=zero_division)
+    return tnr
+
+
 @_deprecate_positional_args
 def balanced_accuracy_score(y_true, y_pred, *, sample_weight=None,
                             adjusted=False):
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 38619cfde94d6..2696b9e67a13f 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -43,6 +43,7 @@
 from sklearn.metrics import precision_score
 from sklearn.metrics import recall_score
 from sklearn.metrics import tpr_fpr_tnr_fnr_scores
+from sklearn.metrics import specificity_score
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import brier_score_loss
 from sklearn.metrics import multilabel_confusion_matrix
@@ -1048,6 +1049,8 @@ def test_zero_precision_recall():
                             0.0, 2)
         assert_almost_equal(f1_score(y_true, y_pred, average='macro'),
                             0.0, 2)
+        assert_almost_equal(specificity_score(y_true, y_pred, average='macro'),
+                            0.0, 2)
 
     finally:
         np.seterr(**old_error_settings)
@@ -2102,6 +2105,35 @@ def test_fscore_warnings(zero_division):
                 assert len(record) == 0
 
 
+@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+def test_specificity_warnings(zero_division):
+    assert_no_warnings(specificity_score,
+                       np.array([[1, 1], [1, 1]]),
+                       np.array([[0, 0], [0, 0]]),
+                       average='micro', zero_division=zero_division)
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter('always')
+        specificity_score(np.array([[0, 0], [0, 0]]),
+                          np.array([[1, 1], [1, 1]]),
+                          average='micro', zero_division=zero_division)
+        if zero_division == "warn":
+            assert (str(record.pop().message) ==
+                    'Tnr is ill-defined and '
+                    'being set to 0.0 due to no true samples.'
+                    ' Use `zero_division` parameter to control'
+                    ' this behavior.')
+        else:
+            assert len(record) == 0
+
+        specificity_score([1, 1], [1, 1])
+        if zero_division == "warn":
+            assert (str(record.pop().message) ==
+                    'Tnr is ill-defined and '
+                    'being set to 0.0 due to no true samples.'
+                    ' Use `zero_division` parameter to control'
+                    ' this behavior.')
+
+
 def test_prf_average_binary_data_non_binary():
     # Error if user does not explicitly set non-binary average mode
     y_true_mc = [1, 2, 3, 3]

From 56132a3fa09e72c7ce6f213d141689ddfa8e8cf6 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 11:26:09 +0100
Subject: [PATCH 020/127] Update __init__.py

---
 sklearn/metrics/__init__.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 0215acbe5b74c..847b5234f2a8f 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -15,6 +15,7 @@
 from ._ranking import precision_recall_curve
 from ._ranking import roc_auc_score
 from ._ranking import roc_curve
+from ._ranking import top_k_accuracy_score
 
 from ._classification import accuracy_score
 from ._classification import balanced_accuracy_score
@@ -35,10 +36,13 @@
 from ._classification import brier_score_loss
 from ._classification import multilabel_confusion_matrix
 from ._classification import tpr_fpr_tnr_fnr_scores
+from ._classification import specificity_score
 
 from . import cluster
 from .cluster import adjusted_mutual_info_score
 from .cluster import adjusted_rand_score
+from .cluster import rand_score
+from .cluster import pair_confusion_matrix
 from .cluster import completeness_score
 from .cluster import consensus_score
 from .cluster import homogeneity_completeness_v_measure
@@ -67,6 +71,7 @@
 from ._regression import mean_squared_log_error
 from ._regression import median_absolute_error
 from ._regression import mean_absolute_percentage_error
+from ._regression import mean_pinball_loss
 from ._regression import r2_score
 from ._regression import mean_tweedie_deviance
 from ._regression import mean_poisson_deviance
@@ -131,6 +136,7 @@
     'mean_absolute_error',
     'mean_squared_error',
     'mean_squared_log_error',
+    'mean_pinball_loss',
     'mean_poisson_deviance',
     'mean_gamma_deviance',
     'mean_tweedie_deviance',
@@ -140,6 +146,7 @@
     'mutual_info_score',
     'ndcg_score',
     'normalized_mutual_info_score',
+    'pair_confusion_matrix',
     'pairwise_distances',
     'pairwise_distances_argmin',
     'pairwise_distances_argmin_min',
@@ -154,6 +161,7 @@
     'precision_recall_fscore_support',
     'precision_score',
     'r2_score',
+    'rand_score',
     'recall_score',
     'RocCurveDisplay',
     'roc_auc_score',
@@ -161,7 +169,9 @@
     'SCORERS',
     'silhouette_samples',
     'silhouette_score',
+    'top_k_accuracy_score',
     'tpr_fpr_tnr_fnr_scores',
+    'specificity_score',
     'v_measure_score',
     'zero_one_loss',
     'brier_score_loss',

From 995a65c09da96d2812039eabb15909bae6ec21a9 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 12:14:19 +0100
Subject: [PATCH 021/127] Fix linting, add what's new entry

---
 doc/whats_new/v0.24.rst            | 581 +++++++++++++++++++++++------
 doc/whats_new/v1.0.rst             | 213 +++++++++++
 sklearn/metrics/_classification.py |   6 +-
 3 files changed, 688 insertions(+), 112 deletions(-)
 create mode 100644 doc/whats_new/v1.0.rst

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 6491d043b8900..6f2584dccdd10 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -2,13 +2,97 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_0_24_2:
+
+Version 0.24.2
+==============
+
+**TBD 2021**
+
+Changelog
+---------
+
+:mod:`sklearn.ensemble`
+.......................
+
+- |Fix| Fixed a bug in :class:`ensemble.HistGradientBoostingRegressor` `fit`
+  with `sample_weight` parameter and `least_absolute_deviation` loss function.
+  :pr:`19407` by :user:`Vadim Ushtanit <vadim-ushtanit>`.
+
+:mod:`sklearn.linear_model`
+...........................
+
+- |Fix|: Fixed a bug in :class:`linear_model.LogisticRegression`: the
+  sample_weight object is not modified anymore. :pr:`19182` by
+  :user:`Yosuke KOBAYASHI <m7142yosuke>`.
+
+:mod:`sklearn.preprocessing`
+............................
+
+- |Fix| Validate the constructor parameter `handle_unknown` in
+  :class:`preprocessing.OrdinalEncoder` to only allow for `'error'` and
+  `'use_encoded_value'` strategies.
+  :pr:`19234` by `Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.semi_supervised`
+..............................
+
+- |Fix| Avoid NaN during label propagation in
+  :class:`~sklearn.semi_supervised.LabelPropagation`.
+  :pr:`19271` by :user:`Zhaowei Wang <ThuWangzw>`.
+
+:mod:`sklearn.utils`
+....................
+
+- |Fix| Better contains the CSS provided by :func:`utils.estimator_html_repr`
+  by giving CSS ids to the html representation. :pr:`19417` by `Thomas Fan`_.
+
+.. _changes_0_24_1:
+
+Version 0.24.1
+==============
+
+**January 2021**
+
+Packaging
+---------
+
+The 0.24.0 scikit-learn wheels were not working with MacOS <1.15 due to
+`libomp`. The version of `libomp` used to build the wheels was too recent for
+older macOS versions. This issue has been fixed for 0.24.1 scikit-learn wheels.
+Scikit-learn wheels published on PyPI.org now officially support macOS 10.13
+and later.
+
+Changelog
+---------
+
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| Fix numerical stability bug that could happen in
+  :func:`metrics.adjusted_mutual_info_score` and
+  :func:`metrics.mutual_info_score` with NumPy 1.20+.
+  :pr:`19179` by `Thomas Fan`_.
+
+:mod:`sklearn.semi_supervised`
+..............................
+
+- |Fix| :class:`semi_supervised.SelfTrainingClassifier` is now accepting
+  meta-estimator (e.g. :class:`ensemble.StackingClassifier`). The validation
+  of this estimator is done on the fitted estimator, once we know the existence
+  of the method `predict_proba`.
+  :pr:`19126` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 .. _changes_0_24:
 
 Version 0.24.0
 ==============
 
-**In Development**
+**December 2020**
 
+For a short description of the main highlights of the release, please
+refer to
+:ref:`sphx_glr_auto_examples_release_highlights_plot_release_highlights_0_24_0.py`.
 
 .. include:: changelog_legend.inc
 
@@ -26,25 +110,26 @@ random sampling procedures.
   between 32-bits and 64-bits data when the kernel has small positive
   eigenvalues.
 
+- |Fix| :class:`decomposition.TruncatedSVD` becomes deterministic by exposing
+  a `random_state` parameter.
+
+- |Fix| :class:`linear_model.Perceptron` when `penalty='elasticnet'`.
+
 Details are listed in the changelog below.
 
 (While we are trying to better inform users by providing this information, we
 cannot assure that this list is complete.)
 
-
 Changelog
 ---------
 
-..
-    Entries should be grouped by module (in alphabetic order) and prefixed with
-    one of the labels: |MajorFeature|, |Feature|, |Efficiency|, |Enhancement|,
-    |Fix| or |API| (see whats_new.rst for descriptions).
-    Entries should be ordered by those labels (e.g. |Fix| after |Efficiency|).
-    Changes not specific to a module should be listed under *Multiple Modules*
-    or *Miscellaneous*.
-    Entries should end with:
-    :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
-    where 123456 is the *pull request* number, not the issue number.
+:mod:`sklearn.base`
+...................
+
+- |Fix| :meth:`base.BaseEstimator.get_params` now will raise an
+  `AttributeError` if a parameter cannot be retrieved as
+  an instance attribute. Previously it would return `None`.
+  :pr:`17448` by :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
 
 :mod:`sklearn.calibration`
 ..........................
@@ -58,6 +143,14 @@ Changelog
   sparse matrix or dataframe at the start. :pr:`17546` by
   :user:`Lucy Liu <lucyleeow>`.
 
+- |Enhancement| Add `ensemble` parameter to
+  :class:`calibration.CalibratedClassifierCV`, which enables implementation
+  of calibration via an ensemble of calibrators (current method) or
+  just one calibrator using all the data (similar to the built-in feature of
+  :mod:`sklearn.svm` estimators with the `probabilities=True` parameter).
+  :pr:`17856` by :user:`Lucy Liu <lucyleeow>` and
+  :user:`Andrea Esuli <aesuli>`.
+
 :mod:`sklearn.cluster`
 ......................
 
@@ -75,31 +168,60 @@ Changelog
   When set to `True`, additional messages will be displayed which can aid with
   debugging. :pr:`18052` by :user:`Sean O. Stalley <sstalley>`.
 
+- |Enhancement| Added :func:`cluster.kmeans_plusplus` as public function.
+  Initialization by KMeans++ can now be called separately to generate
+  initial cluster centroids. :pr:`17937` by :user:`g-walsh`
+
 - |API| :class:`cluster.MiniBatchKMeans` attributes, `counts_` and
-  `init_size_`, are deprecated and will be removed in 0.26. :pr:`17864` by
-  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  `init_size_`, are deprecated and will be removed in 1.1 (renaming of 0.26).
+  :pr:`17864` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 :mod:`sklearn.compose`
 ......................
 
-- |FIX| :class:`compose.ColumnTransformer` will skip transformers the
+- |Fix| :class:`compose.ColumnTransformer` will skip transformers the
   column selector is a list of bools that are False. :pr:`17616` by
   `Thomas Fan`_.
 
-- |FIX| :class:`compose.ColumnTransformer` now displays the remainder in the
+- |Fix| :class:`compose.ColumnTransformer` now displays the remainder in the
   diagram display. :pr:`18167` by `Thomas Fan`_.
 
+- |Fix| :class:`compose.ColumnTransformer` enforces strict count and order
+  of column names between `fit` and `transform` by raising an error instead
+  of a warning, following the deprecation cycle.
+  :pr:`18256` by :user:`Madhura Jayratne <madhuracj>`.
+
 :mod:`sklearn.covariance`
 .........................
 
-- |API| Deprecates `cv_alphas_` in favor of `cv_results['alphas']` and
+- |API| Deprecates `cv_alphas_` in favor of `cv_results_['alphas']` and
   `grid_scores_` in favor of split scores in `cv_results_` in
   :class:`covariance.GraphicalLassoCV`. `cv_alphas_` and `grid_scores_` will be
-  removed in version 0.26. :pr:`16392` by `Thomas Fan`_.
+  removed in version 1.1 (renaming of 0.26).
+  :pr:`16392` by `Thomas Fan`_.
 
 :mod:`sklearn.cross_decomposition`
 ..................................
 
+- |Fix| Fixed a bug in :class:`cross_decomposition.PLSSVD` which would
+  sometimes return components in the reversed order of importance.
+  :pr:`17095` by `Nicolas Hug`_.
+
+- |Fix| Fixed a bug in :class:`cross_decomposition.PLSSVD`,
+  :class:`cross_decomposition.CCA`, and
+  :class:`cross_decomposition.PLSCanonical`, which would lead to incorrect
+  predictions for `est.transform(Y)` when the training data is single-target.
+  :pr:`17095` by `Nicolas Hug`_.
+
+- |Fix| Increases the stability of :class:`cross_decomposition.CCA` :pr:`18746`
+  by `Thomas Fan`_.
+
+- |API| For :class:`cross_decomposition.NMF`,
+  the `init` value, when 'init=None' and
+  n_components <= min(n_samples, n_features) will be changed from
+  `'nndsvd'` to `'nndsvda'` in 1.1 (renaming of 0.26).
+  :pr:`18525` by :user:`Chiara Marmo <cmarmo>`.
+
 - |API| The bounds of the `n_components` parameter is now restricted:
 
   - into `[1, min(n_samples, n_features, n_targets)]`, for
@@ -107,37 +229,43 @@ Changelog
     and :class:`cross_decomposition.PLSCanonical`.
   - into `[1, n_features]` or :class:`cross_decomposition.PLSRegression`.
 
-  An error will be raised in 0.26. :pr:`17095` by `Nicolas Hug`_.
+  An error will be raised in 1.1 (renaming of 0.26).
+  :pr:`17095` by `Nicolas Hug`_.
 
 - |API| For :class:`cross_decomposition.PLSSVD`,
   :class:`cross_decomposition.CCA`, and
   :class:`cross_decomposition.PLSCanonical`, the `x_scores_` and `y_scores_`
-  attributes were deprecated and will be removed in 0.26. They can be
-  retrieved by calling `transform` on the training data. The `norm_y_weights`
-  attribute will also be removed. :pr:`17095` by `Nicolas Hug`_.
-
-- |Fix| Fixed a bug in :class:`cross_decomposition.PLSSVD` which would
-  sometimes return components in the reversed order of importance.
+  attributes were deprecated and will be removed in 1.1 (renaming of 0.26).
+  They can be retrieved by calling `transform` on the training data.
+  The `norm_y_weights` attribute will also be removed.
   :pr:`17095` by `Nicolas Hug`_.
 
-- |Fix| Fixed a bug in :class:`cross_decomposition.PLSSVD`,
+- |API| For :class:`cross_decomposition.PLSRegression`,
+  :class:`cross_decomposition.PLSCanonical`,
   :class:`cross_decomposition.CCA`, and
-  :class:`cross_decomposition.PLSCanonical`, which would lead to incorrect
-  predictions for `est.transform(Y)` when the training data is single-target.
-  :pr:`17095` by `Nicolas Hug`_.
+  :class:`cross_decomposition.PLSSVD`, the `x_mean_`, `y_mean_`, `x_std_`, and
+  `y_std_` attributes were deprecated and will be removed in 1.1
+  (renaming of 0.26).
+  :pr:`18768` by :user:`Maren Westermann <marenwestermann>`.
+
+- |Fix| :class:`decomposition.TruncatedSVD` becomes deterministic by using the
+  `random_state`. It controls the weights' initialization of the underlying
+  ARPACK solver.
+  :pr:` #18302` by :user:`Gaurav Desai <gauravkdesai>` and
+  :user:`Ivan Panico <FollowKenny>`.
 
 :mod:`sklearn.datasets`
 .......................
 
+- |Feature| :func:`datasets.fetch_openml` now validates md5 checksum of arff
+  files downloaded or cached to ensure data integrity.
+  :pr:`14800` by :user:`Shashank Singh <shashanksingh28>` and `Joel Nothman`_.
+
 - |Enhancement| :func:`datasets.fetch_openml` now allows argument `as_frame`
   to be 'auto', which tries to convert returned data to pandas DataFrame
   unless data is sparse.
   :pr:`17396` by :user:`Jiaxiang <fujiaxiang>`.
 
-- |Feature| :func:`datasets.fetch_openml` now validates md5checksum of arff
-  files downloaded or cached to ensure data integrity.
-  :pr:`14800` by :user:`Shashank Singh <shashanksingh28>` and `Joel Nothman`_.
-
 - |Enhancement| :func:`datasets.fetch_covtype` now now supports the optional
   argument `as_frame`; when it is set to True, the returned Bunch object's
   `data` and `frame` members are pandas DataFrames, and the `target` member is
@@ -151,6 +279,11 @@ Changelog
   :pr:`18280` by :user:`Alex Liang <tianchuliang>` and
   `Guillaume Lemaitre`_.
 
+- |Enhancement| :func:`datasets.fetch_20newsgroups_vectorized` now supports
+  loading as a pandas ``DataFrame`` by setting ``as_frame=True``.
+  :pr:`17499` by :user:`Brigitta Sipőcz <bsipocz>` and
+  `Guillaume Lemaitre`_.
+
 - |API| The default value of `as_frame` in :func:`datasets.fetch_openml` is
   changed from False to 'auto'.
   :pr:`17610` by :user:`Jiaxiang <fujiaxiang>`.
@@ -158,6 +291,16 @@ Changelog
 :mod:`sklearn.decomposition`
 ............................
 
+- |Enhancement| :func:`decomposition.FactorAnalysis` now supports the optional
+  argument `rotation`, which can take the value `None`, `'varimax'` or
+  `'quartimax'`. :pr:`11064` by :user:`Jona Sassenhagen <jona-sassenhagen>`.
+
+- |Enhancement| :class:`decomposition.NMF` now supports the optional parameter
+  `regularization`, which can take the values `None`, 'components',
+  'transformation' or 'both', in accordance with
+  :func:`decomposition.NMF.non_negative_factorization`.
+  :pr:`17414` by :user:`Bharat Raghunathan <Bharat123rox>`.
+
 - |Fix| :class:`decomposition.KernelPCA` behaviour is now more consistent
   between 32-bits and 64-bits data input when the kernel has small positive
   eigenvalues. Small positive eigenvalues were not correctly discarded for
@@ -166,30 +309,31 @@ Changelog
 
 - |Fix| Fix :class:`decomposition.SparseCoder` such that it follows
   scikit-learn API and support cloning. The attribute `components_` is
-  deprecated in 0.24 and will be removed in 0.26. This attribute was
-  redundant with the `dictionary` attribute and constructor parameter.
+  deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26).
+  This attribute was redundant with the `dictionary` attribute and constructor
+  parameter.
   :pr:`17679` by :user:`Xavier Dupré <sdpython>`.
 
-- |Enhancement| :func:`decomposition.FactorAnalysis` now supports the optional
-  argument `rotation`, which can take the value `None`, `'varimax'` or
-  `'quartimax'.` :pr:`11064` by :user:`Jona Sassenhagen <jona-sassenhagen>`.
-
-- |Enhancement| :class:`decomposition.NMF` now supports the optional parameter
-  `regularization`, which can take the values `None`, `components`,
-  `transformation` or `both`, in accordance with
-  :func:`decomposition.NMF.non_negative_factorization`.
-  :pr:`17414` by :user:`Bharat Raghunathan <Bharat123rox>`.
+- |Fix| :meth:`TruncatedSVD.fit_transform` consistently returns the same
+  as :meth:`TruncatedSVD.fit` followed by :meth:`TruncatedSVD.transform`.
+  :pr:`18528` by :user:`Albert Villanova del Moral <albertvillanova>` and
+  :user:`Ruifeng Zheng <zhengruifeng>`.
 
 :mod:`sklearn.discriminant_analysis`
 ....................................
 
 - |Enhancement| :class:`discriminant_analysis.LinearDiscriminantAnalysis` can
   now use custom covariance estimate by setting the `covariance_estimator`
-  parameter. :pr:`14446` by :user:`Hugo Richard <hugorichard>`
+  parameter. :pr:`14446` by :user:`Hugo Richard <hugorichard>`.
 
 :mod:`sklearn.ensemble`
 .......................
 
+- |MajorFeature| :class:`ensemble.HistGradientBoostingRegressor` and
+  :class:`ensemble.HistGradientBoostingClassifier` now have native
+  support for categorical features with the `categorical_features`
+  parameter. :pr:`18394` by `Nicolas Hug`_ and `Thomas Fan`_.
+
 - |Feature| :class:`ensemble.HistGradientBoostingRegressor` and
   :class:`ensemble.HistGradientBoostingClassifier` now support the
   method `staged_predict`, which allows monitoring of each stage.
@@ -209,26 +353,27 @@ Changelog
   :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and
   :user:`Egor Smirnov <SmirnovEgorRu>`.
 
-- |API|: The parameter ``n_classes_`` is now deprecated in
+- |Fix| Fixed a bug in
+  :class:`ensemble.HistGradientBoostingRegressor` and
+  :class:`ensemble.HistGradientBoostingClassifier` which can now accept data
+  with `uint8` dtype in `predict`. :pr:`18410` by `Nicolas Hug`_.
+
+- |API| The parameter ``n_classes_`` is now deprecated in
   :class:`ensemble.GradientBoostingRegressor` and returns `1`.
   :pr:`17702` by :user:`Simona Maggio <simonamaggio>`.
 
-- |API|: Mean absolute error ('mae') is now deprecated for the parameter
+- |API| Mean absolute error ('mae') is now deprecated for the parameter
   ``criterion`` in :class:`ensemble.GradientBoostingRegressor` and
   :class:`ensemble.GradientBoostingClassifier`.
   :pr:`18326` by :user:`Madhura Jayaratne <madhuracj>`.
 
-- |Fix|: Fixed a bug in
-  :class:`ensemble.HistGradientBoostingRegressor` and
-  :class:`ensemble.HistGradientBoostingClassifier` which can now accept data
-  with `uint8` dtype in `predict`. :pr:`18410` by `Nicolas Hug`_.
-
 :mod:`sklearn.exceptions`
 .........................
 
 - |API| :class:`exceptions.ChangedBehaviorWarning` and
   :class:`exceptions.NonBLASDotWarning` are deprecated and will be removed in
-  v0.26, :pr:`17804` by `Adrin Jalali`_.
+  1.1 (renaming of 0.26).
+  :pr:`17804` by `Adrin Jalali`_.
 
 :mod:`sklearn.feature_extraction`
 .................................
@@ -237,6 +382,11 @@ Changelog
   values for one categorical feature. :pr:`17367` by :user:`Peng Yu <yupbank>`
   and :user:`Chiara Marmo <cmarmo>`.
 
+- |Fix| :class:`feature_extraction.CountVectorizer` raises an issue if a
+  custom token pattern which capture more than one group is provided.
+  :pr:`15427` by :user:`Gangesh Gudmalwar <ggangesh>` and
+  :user:`Erin R Hoffman <hoffm386>`.
+
 :mod:`sklearn.feature_selection`
 ................................
 
@@ -250,18 +400,18 @@ Changelog
   attribute name/path or a `callable` for extracting feature importance from
   the estimator.  :pr:`15361` by :user:`Venkatachalam N <venkyyuvy>`.
 
-- |Enhancement| :class:`feature_selection.RFE` supports the option for the
-  number of `n_features_to_select` to be given as a float representing the
-  percentage of features to select.
-  :pr:`17090` by :user:`Lisa Schwetlick <lschwetlick>` and
-  :user:`Marija Vlajic Wheeler <marijavlajic>`.
-
 - |Efficiency| Reduce memory footprint in
   :func:`feature_selection.mutual_info_classif`
   and :func:`feature_selection.mutual_info_regression` by calling
   :class:`neighbors.KDTree` for counting nearest neighbors. :pr:`17878` by
   :user:`Noel Rogers <noelano>`.
 
+- |Enhancement| :class:`feature_selection.RFE` supports the option for the
+  number of `n_features_to_select` to be given as a float representing the
+  percentage of features to select.
+  :pr:`17090` by :user:`Lisa Schwetlick <lschwetlick>` and
+  :user:`Marija Vlajic Wheeler <marijavlajic>`.
+
 :mod:`sklearn.gaussian_process`
 ...............................
 
@@ -274,12 +424,6 @@ Changelog
 :mod:`sklearn.impute`
 .....................
 
-- |Fix| replace the default values in :class:`impute.IterativeImputer`
-  of `min_value` and `max_value` parameters to `-np.inf` and `np.inf`,
-  respectively instead of `None`. However, the behaviour of the class does not
-  change since `None` was defaulting to these values already.
-  :pr:`16493` by :user:`Darshan N <DarshanGowda0>`.
-
 - |Feature| :class:`impute.SimpleImputer` now supports a list of strings
   when ``strategy='most_frequent'`` or ``strategy='constant'``.
   :pr:`17526` by :user:`Ayako YAGI <yagi-3>` and
@@ -289,10 +433,20 @@ Changelog
   revert imputed data to original when instantiated with
   ``add_indicator=True``. :pr:`17612` by :user:`Srimukh Sripada <d3b0unce>`.
 
+- |Fix| replace the default values in :class:`impute.IterativeImputer`
+  of `min_value` and `max_value` parameters to `-np.inf` and `np.inf`,
+  respectively instead of `None`. However, the behaviour of the class does not
+  change since `None` was defaulting to these values already.
+  :pr:`16493` by :user:`Darshan N <DarshanGowda0>`.
+
 - |Fix| :class:`impute.IterativeImputer` will not attempt to set the
   estimator's `random_state` attribute, allowing to use it with more external classes.
   :pr:`15636` by :user:`David Cortes <david-cortes>`.
 
+- |Efficiency| :class:`impute.SimpleImputer` is now faster with `object` dtype array.
+  when `strategy='most_frequent'` in :class:`~sklearn.impute.SimpleImputer`.
+  :pr:`18987` by :user:`David Katz <DavidKatz-il>`.
+
 :mod:`sklearn.inspection`
 .........................
 
@@ -306,6 +460,11 @@ Changelog
   :func:`inspection.permutation_importance`. :pr:`16906` by
   :user:`Roei Kahny <RoeiKa>`.
 
+- |API| Positional arguments are deprecated in
+  :meth:`inspection.PartialDependenceDisplay.plot` and will error in 1.1
+  (renaming of 0.26).
+  :pr:`18293` by `Thomas Fan`_.
+
 :mod:`sklearn.isotonic`
 .......................
 
@@ -318,6 +477,10 @@ Changelog
 - |Enhancement| :class:`isotonic.IsotonicRegression` now accepts 2d array with
   1 feature as input array. :pr:`17379` by :user:`Jiaxiang <fujiaxiang>`.
 
+- |Fix| Add tolerance when determining duplicate X values to prevent
+  inf values from being predicted by :class:`isotonic.IsotonicRegression`.
+  :pr:`18639` by :user:`Lucy Liu <lucyleeow>`.
+
 :mod:`sklearn.kernel_approximation`
 ...................................
 
@@ -326,6 +489,9 @@ Changelog
   map approximation.
   :pr:`13003` by :user:`Daniel López Sánchez <lopeLH>`.
 
+- |Efficiency| :class:`kernel_approximation.Nystroem` now supports
+  parallelization via `joblib.Parallel` using argument `n_jobs`.
+  :pr:`18545` by :user:`Laurenz Reitsam <LaurenzReitsam>`.
 
 :mod:`sklearn.linear_model`
 ...........................
@@ -341,26 +507,50 @@ Changelog
   efficient leave-one-out cross-validation scheme ``cv=None``. :pr:`6624` by
   :user:`Marijn van Vliet <wmvanvliet>`.
 
+- |Fix| Fixes bug in :class:`linear_model.TheilSenRegressor` where
+  `predict` and `score` would fail when `fit_intercept=False` and there was
+  one feature during fitting. :pr:`18121` by `Thomas Fan`_.
+
+- |Fix| Fixes bug in :class:`linear_model.ARDRegression` where `predict`
+  was raising an error when `normalize=True` and `return_std=True` because
+  `X_offset_` and `X_scale_` were undefined.
+  :pr:`18607` by :user:`fhaselbeck <fhaselbeck>`.
+
+- |Fix| Added the missing `l1_ratio` parameter in
+  :class:`linear_model.Perceptron`, to be used when `penalty='elasticnet'`.
+  This changes the default from 0 to 0.15. :pr:`18622` by
+  :user:`Haesun Park <rickiepark>`.
 
 :mod:`sklearn.manifold`
 .......................
 
-- |Enhancement| Add `square_distances` parameter to :class:`manifold.TSNE`,
-  which provides backward compatibility during deprecation of legacy squaring
-  behavior. Distances will be squared by default in 0.26, and this parameter
-  will be removed in 0.28. :pr:`17662` by
-  :user:`Joshua Newton <joshuacwnewton>`.
-
 - |Efficiency| Fixed :issue:`10493`. Improve Local Linear Embedding (LLE)
   that raised `MemoryError` exception when used with large inputs.
   :pr:`17997` by :user:`Bertrand Maisonneuve <bmaisonn>`.
 
+- |Enhancement| Add `square_distances` parameter to :class:`manifold.TSNE`,
+  which provides backward compatibility during deprecation of legacy squaring
+  behavior. Distances will be squared by default in 1.1 (renaming of 0.26),
+  and this parameter will be removed in 1.3. :pr:`17662` by
+  :user:`Joshua Newton <joshuacwnewton>`.
+
 - |Fix| :class:`manifold.MDS` now correctly sets its `_pairwise` attribute.
   :pr:`18278` by `Thomas Fan`_.
 
 :mod:`sklearn.metrics`
 ......................
 
+- |Feature| Added :func:`metrics.cluster.pair_confusion_matrix` implementing
+  the confusion matrix arising from pairs of elements from two clusterings.
+  :pr:`17412` by :user:`Uwe F Mayer <ufmayer>`.
+
+- |Feature| new metric :func:`metrics.top_k_accuracy_score`. It's a
+  generalization of :func:`metrics.top_k_accuracy_score`, the difference is
+  that a prediction is considered correct as long as the true label is
+  associated with one of the `k` highest predicted scores.
+  :func:`accuracy_score` is the special case of `k = 1`.
+  :pr:`16625` by :user:`Geoffrey Bolmier <gbolmier>`.
+
 - |Feature| Added :func:`metrics.det_curve` to compute Detection Error Tradeoff
   curve classification metric.
   :pr:`10591` by :user:`Jeremy Karnowski <jkarnows>` and
@@ -376,10 +566,17 @@ Changelog
   some practical test cases were taken from PR :pr:`10711` by
   :user:`Mohamed Ali Jamaoui <mohamed-ali>`.
 
-- |Fix| Fixed a bug in
-  :func:`metrics.classification_report` which was raising AttributeError
-  when called with `output_dict=True` for 0-length values.
-  :pr:`17777` by :user:`Shubhanshu Mishra <napsternxg>`.
+- |Feature| Added :func:`metrics.rand_score` implementing the (unadjusted)
+  Rand index.
+  :pr:`17412` by :user:`Uwe F Mayer <ufmayer>`.
+
+- |Feature| :func:`metrics.plot_confusion_matrix` now supports making colorbar
+  optional in the matplotlib plot by setting `colorbar=False`. :pr:`17192` by
+  :user:`Avi Gupta <avigupta2612>`
+
+- |Feature| :func:`metrics.plot_confusion_matrix` now supports making colorbar
+  optional in the matplotlib plot by setting colorbar=False. :pr:`17192` by
+  :user:`Avi Gupta <avigupta2612>`.
 
 - |Enhancement| Add `sample_weight` parameter to
   :func:`metrics.median_absolute_error`. :pr:`17225` by
@@ -390,15 +587,21 @@ Changelog
   class to be used when computing the precision and recall statistics.
   :pr:`17569` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Feature| :func:`metrics.plot_confusion_matrix` now supports making colorbar
-  optional in the matplotlib plot by setting colorbar=False. :pr:`17192` by
-  :user:`Avi Gupta <avigupta2612>`
-
 - |Enhancement| Add `pos_label` parameter in
   :func:`metrics.plot_roc_curve` in order to specify the positive
   class to be used when computing the roc auc statistics.
   :pr:`17651` by :user:`Clara Matos <claramatos>`.
 
+- |Fix| Fixed a bug in
+  :func:`metrics.classification_report` which was raising AttributeError
+  when called with `output_dict=True` for 0-length values.
+  :pr:`17777` by :user:`Shubhanshu Mishra <napsternxg>`.
+
+- |Fix| Fixed a bug in
+  :func:`metrics.classification_report` which was raising AttributeError
+  when called with `output_dict=True` for 0-length values.
+  :pr:`17777` by :user:`Shubhanshu Mishra <napsternxg>`.
+
 - |Fix| Fixed a bug in
   :func:`metrics.jaccard_score` which recommended the `zero_division`
   parameter when called with no true or predicted samples.
@@ -410,27 +613,58 @@ Changelog
   ``labels`` parameter.
   :pr:`17935` by :user:`Cary Goltermann <Ultramann>`.
 
+- |Fix| Fix scorers that accept a pos_label parameter and compute their metrics
+  from values returned by `decision_function` or `predict_proba`. Previously,
+  they would return erroneous values when pos_label was not corresponding to
+  `classifier.classes_[1]`. This is especially important when training
+  classifiers directly with string labeled target classes.
+  :pr:`18114` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- |Fix| Fixed bug in :func:`metrics.plot_confusion_matrix` where error occurs
+  when `y_true` contains labels that were not previously seen by the classifier
+  while the `labels` and `display_labels` parameters are set to `None`.
+  :pr:`18405` by :user:`Thomas J. Fan <thomasjpfan>` and
+  :user:`Yakov Pchelintsev <kyouma>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 
+- |MajorFeature| Added (experimental) parameter search estimators
+  :class:`model_selection.HalvingRandomSearchCV` and
+  :class:`model_selection.HalvingGridSearchCV` which implement Successive
+  Halving, and can be used as a drop-in replacements for
+  :class:`model_selection.RandomizedSearchCV` and
+  :class:`model_selection.GridSearchCV`. :pr:`13900` by `Nicolas Hug`_, `Joel
+  Nothman`_ and `Andreas Müller`_.
+
+- |Feature| :class:`model_selection.RandomizedSearchCV` and
+  :class:`model_selection.GridSearchCV` now have the method ``score_samples``
+  :pr:`17478` by :user:`Teon Brooks <teonbrooks>` and
+  :user:`Mohamed Maskani <maskani-moh>`.
+
 - |Enhancement| :class:`model_selection.TimeSeriesSplit` has two new keyword
   arguments `test_size` and `gap`. `test_size` allows the out-of-sample
   time series length to be fixed for all folds. `gap` removes a fixed number of
   samples between the train and test set on each fold.
   :pr:`13204` by :user:`Kyle Kosic <kykosic>`.
 
-- |Feature| :class:`model_selection.RandomizedSearchCV` and
-  :class:`model_selection.GridSearchCV` now have the method, ``score_samples``
-  :pr:`17478` by :user:`Teon Brooks <teonbrooks>` and
-  :user:`Mohamed Maskani <maskani-moh>`.
+- |Enhancement| :func:`model_selection.permutation_test_score` and
+  :func:`model_selection.validation_curve` now accept fit_params
+  to pass additional estimator parameters.
+  :pr:`18527` by :user:`Gaurav Dhingra <gxyd>`,
+  :user:`Julien Jerphanion <jjerphan>` and :user:`Amanda Dsouza <amy12xx>`.
 
-- |Feature| Added (experimental) parameter search estimators
-  :class:`model_selection.HalvingRandomSearchCV` and
-  :class:`model_selection.HalvingGridSearchCV` which implement Successive
-  Halving, and can be used as a drop-in replacements for
-  :class:`model_selection.RandomizedSearchCV` and
-  :class:`model_selection.GridSearchCV`. :pr:`13900` by `Nicolas Hug`_, `Joel
-  Nothman`_ and `Andreas Müller`_.
+- |Enhancement| :func:`model_selection.cross_val_score`,
+  :func:`model_selection.cross_validate`,
+  :class:`model_selection.GridSearchCV`, and
+  :class:`model_selection.RandomizedSearchCV` allows estimator to fail scoring
+  and replace the score with `error_score`. If `error_score="raise"`, the error
+  will be raised.
+  :pr:`18343` by `Guillaume Lemaitre`_ and :user:`Devi Sandeep <dsandeep0138>`.
+
+- |Enhancement| :func:`model_selection.learning_curve` now accept fit_params
+  to pass additional estimator parameters.
+  :pr:`18595` by :user:`Amanda Dsouza <amy12xx>`.
 
 - |Fix| Fixed the `len` of :class:`model_selection.ParameterSampler` when
   all distributions are lists and `n_iter` is more than the number of unique
@@ -442,28 +676,29 @@ Changelog
   :pr:`18266` by :user:`Subrat Sahu <subrat93>`,
   :user:`Nirvan <Nirvan101>` and :user:`Arthur Book <ArthurBook>`.
 
+- |Enhancement| :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV` and
+  :func:`model_selection.cross_validate` support `scoring` being a callable
+  returning a dictionary of of multiple metric names/values association.
+  :pr:`15126` by `Thomas Fan`_.
+
 :mod:`sklearn.multiclass`
 .........................
 
-- |Fix| A fix to allow :class:`multiclass.OutputCodeClassifier` to accept
-  sparse input data in its `fit` and `predict` methods. The check for
-  validity of the input is now delegated to the base estimator.
-  :pr:`17233` by :user:`Zolisa Bleki <zoj613>`.
-
 - |Enhancement| :class:`multiclass.OneVsOneClassifier` now accepts
   the inputs with missing values. Hence, estimators which can handle
   missing values (may be a pipeline with imputation step) can be used as
   a estimator for multiclass wrappers.
   :pr:`17987` by :user:`Venkatachalam N <venkyyuvy>`.
 
+- |Fix| A fix to allow :class:`multiclass.OutputCodeClassifier` to accept
+  sparse input data in its `fit` and `predict` methods. The check for
+  validity of the input is now delegated to the base estimator.
+  :pr:`17233` by :user:`Zolisa Bleki <zoj613>`.
+
 :mod:`sklearn.multioutput`
 ..........................
 
-- |Fix| A fix to accept tuples for the ``order`` parameter
-  in :class:`multioutput.ClassifierChain`.
-  :pr:`18124` by :user:`Gus Brocchini <boldloop>` and
-  :user:`Amanda Dsouza <amy12xx>`.
-
 - |Enhancement| :class:`multioutput.MultiOutputClassifier` and
   :class:`multioutput.MultiOutputRegressor` now accepts the inputs
   with missing values. Hence, estimators which can handle missing
@@ -471,21 +706,26 @@ Changelog
   estimators) can be used as a estimator for multiclass wrappers.
   :pr:`17987` by :user:`Venkatachalam N <venkyyuvy>`.
 
+- |Fix| A fix to accept tuples for the ``order`` parameter
+  in :class:`multioutput.ClassifierChain`.
+  :pr:`18124` by :user:`Gus Brocchini <boldloop>` and
+  :user:`Amanda Dsouza <amy12xx>`.
+
 :mod:`sklearn.naive_bayes`
 ..........................
 
-- |API|: The attributes ``coef_`` and ``intercept_`` are now deprecated in
-  :class:`naive_bayes.MultinomialNB`, :class:`naive_bayes.ComplementNB`,
-  :class:`naive_bayes.BernoulliNB` and :class:`naive_bayes.CategoricalNB`,
-  and will be removed in v0.26. :pr:`17427` by
-  :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
-
 - |Enhancement| Adds a parameter `min_categories` to
   :class:`naive_bayes.CategoricalNB` that allows a minimum number of categories
   per feature to be specified. This allows categories unseen during training
   to be accounted for.
   :pr:`16326` by :user:`George Armstrong <gwarmstrong>`.
 
+- |API| The attributes ``coef_`` and ``intercept_`` are now deprecated in
+  :class:`naive_bayes.MultinomialNB`, :class:`naive_bayes.ComplementNB`,
+  :class:`naive_bayes.BernoulliNB` and :class:`naive_bayes.CategoricalNB`,
+  and will be removed in v1.1 (renaming of 0.26).
+  :pr:`17427` by :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
+
 :mod:`sklearn.neighbors`
 ........................
 
@@ -506,6 +746,23 @@ Changelog
   the data intrinsic dimensionality is too high for tree-based methods.
   :pr:`17148` by :user:`Geoffrey Bolmier <gbolmier>`.
 
+- |Fix| :class:`neighbors.BinaryTree`
+  will raise a `ValueError` when fitting on data array having points with
+  different dimensions.
+  :pr:`18691` by :user:`Chiara Marmo <cmarmo>`.
+
+- |Fix| :class:`neighbors.NearestCentroid` with a numerical `shrink_threshold`
+  will raise a `ValueError` when fitting on data with all constant features.
+  :pr:`18370` by :user:`Trevor Waite <trewaite>`.
+
+- |Fix| In  methods `radius_neighbors` and
+  `radius_neighbors_graph` of :class:`neighbors.NearestNeighbors`,
+  :class:`neighbors.RadiusNeighborsClassifier`,
+  :class:`neighbors.RadiusNeighborsRegressor`, and
+  :class:`neighbors.RadiusNeighborsTransformer`, using `sort_results=True` now
+  correctly sorts the results even when fitting with the "brute" algorithm.
+  :pr:`18612` by `Tom Dupre la Tour`_.
+
 :mod:`sklearn.neural_network`
 .............................
 
@@ -543,6 +800,9 @@ Changelog
 :mod:`sklearn.preprocessing`
 ............................
 
+- |Feature| :class:`preprocessing.OneHotEncoder` now supports missing
+  values by treating them as a category. :pr:`17317` by `Thomas Fan`_.
+
 - |Feature| Add a new ``handle_unknown`` parameter with a
   ``use_encoded_value`` option, along with a new ``unknown_value`` parameter,
   to :class:`preprocessing.OrdinalEncoder` to allow unknown categories during
@@ -554,6 +814,13 @@ Changelog
   which clips the transformed values of test data to ``feature_range``.
   :pr:`17833` by :user:`Yashika Sharma <yashika51>`.
 
+- |Feature| Add ``sample_weight`` parameter to
+  :class:`preprocessing.StandardScaler`. Allows setting
+  individual weights for each sample. :pr:`18510` and
+  :pr:`18447` and :pr:`16066` and :pr:`18682` by
+  :user:`Maria Telenczuk <maikia>` and :user:`Albert Villanova <albertvillanova>`
+  and :user:`panpiort8` and :user:`Alex Gramfort <agramfort>`.
+
 - |Enhancement| Verbose output of :class:`model_selection.GridSearchCV` has
   been improved for readability. :pr:`16935` by :user:`Raghav Rajagopalan
   <raghavrv>` and :user:`Chiara Marmo <cmarmo>`.
@@ -567,10 +834,28 @@ Changelog
   :class:`preprocessing.KBinsDiscretizer`.
   :pr:`16335` by :user:`Arthur Imbert <Henley13>`.
 
+- |Fix| Raise error on
+  :meth:`sklearn.preprocessing.OneHotEncoder.inverse_transform`
+  when `handle_unknown='error'` and `drop=None` for samples
+  encoded as all zeros. :pr:`14982` by
+  :user:`Kevin Winata <kwinata>`.
+
+:mod:`sklearn.semi_supervised`
+..............................
+
+- |MajorFeature| Added :class:`semi_supervised.SelfTrainingClassifier`, a
+  meta-classifier that allows any supervised classifier to function as a
+  semi-supervised classifier that can learn from unlabeled data. :issue:`11682`
+  by :user:`Oliver Rausch <orausch>` and :user:`Patrice Becker <pr0duktiv>`.
+
+- |Fix| Fix incorrect encoding when using unicode string dtypes in
+  :class:`preprocessing.OneHotEncoder` and
+  :class:`preprocessing.OrdinalEncoder`. :pr:`15763` by `Thomas Fan`_.
+
 :mod:`sklearn.svm`
 ..................
 
-- |Enhancement| invoke scipy blas api for svm kernel function in ``fit``,
+- |Enhancement| invoke SciPy BLAS API for SVM kernel function in ``fit``,
   ``predict`` and related methods of :class:`svm.SVC`, :class:`svm.NuSVC`,
   :class:`svm.SVR`, :class:`svm.NuSVR`, :class:`OneClassSVM`.
   :pr:`16530` by :user:`Shuhua Fan <jim0421>`.
@@ -578,10 +863,14 @@ Changelog
 :mod:`sklearn.tree`
 ...................
 
+- |Feature| :class:`tree.DecisionTreeRegressor` now supports the new splitting
+  criterion ``'poisson'`` useful for modeling count data. :pr:`17386` by
+  :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Enhancement| :func:`tree.plot_tree` now uses colors from the matplotlib
   configuration settings. :pr:`17187` by `Andreas Müller`_.
 
-- |API|: The parameter ``X_idx_sorted`` is now deprecated in
+- |API| The parameter ``X_idx_sorted`` is now deprecated in
   :meth:`tree.DecisionTreeClassifier.fit` and
   :meth:`tree.DecisionTreeRegressor.fit`, and has not effect.
   :pr:`17614` by :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
@@ -589,6 +878,15 @@ Changelog
 :mod:`sklearn.utils`
 ....................
 
+- |Enhancement| Add ``check_methods_sample_order_invariance`` to
+  :func:`~utils.estimator_checks.check_estimator`, which checks that
+  estimator methods are invariant if applied to the same dataset
+  with different sample order :pr:`17598` by :user:`Jason Ngo <ngojason9>`.
+
+- |Enhancement| Add support for weights in
+  :func:`utils.sparse_func.incr_mean_variance_axis`.
+  By :user:`Maria Telenczuk <maikia>` and :user:`Alex Gramfort <agramfort>`.
+
 - |Fix| Raise ValueError with clear error message in :func:`check_array`
   for sparse DataFrames with mixed types.
   :pr:`17992` by :user:`Thomas J. Fan <thomasjpfan>` and
@@ -598,8 +896,73 @@ Changelog
   with different endianness.
   :pr:`17644` by :user:`Qi Zhang <qzhang90>`.
 
+- |Fix| Check that we raise proper error when axis=1 and the
+  dimensions do not match in :func:`utils.sparse_func.incr_mean_variance_axis`.
+  By :user:`Alex Gramfort <agramfort>`.
+
+Miscellaneous
+.............
+
+- |Enhancement| Calls to ``repr`` are now faster
+  when `print_changed_only=True`, especially with meta-estimators.
+  :pr:`18508` by :user:`Nathan C. <Xethan>`.
+
 Code and Documentation Contributors
 -----------------------------------
 
 Thanks to everyone who has contributed to the maintenance and improvement of
-the project since version 0.20, including:
+the project since version 0.23, including:
+
+Abo7atm, Adam Spannbauer, Adrin Jalali, adrinjalali, Agamemnon Krasoulis,
+Akshay Deodhar, Albert Villanova del Moral, Alessandro Gentile, Alex Henrie,
+Alex Itkes, Alex Liang, Alexander Lenail, alexandracraciun, Alexandre Gramfort,
+alexshacked, Allan D Butler, Amanda Dsouza, amy12xx, Anand Tiwari, Anderson
+Nelson, Andreas Mueller, Ankit Choraria, Archana Subramaniyan, Arthur Imbert,
+Ashutosh Hathidara, Ashutosh Kushwaha, Atsushi Nukariya, Aura Munoz, AutoViz
+and Auto_ViML, Avi Gupta, Avinash Anakal, Ayako YAGI, barankarakus,
+barberogaston, beatrizsmg, Ben Mainye, Benjamin Bossan, Benjamin Pedigo, Bharat
+Raghunathan, Bhavika Devnani, Biprateep Dey, bmaisonn, Bo Chang, Boris
+Villazón-Terrazas, brigi, Brigitta Sipőcz, Bruno Charron, Byron Smith, Cary
+Goltermann, Cat Chenal, CeeThinwa, chaitanyamogal, Charles Patel, Chiara Marmo,
+Christian Kastner, Christian Lorentzen, Christoph Deil, Christos Aridas, Clara
+Matos, clmbst, Coelhudo, crispinlogan, Cristina Mulas, Daniel López, Daniel
+Mohns, darioka, Darshan N, david-cortes, Declan O'Neill, Deeksha Madan,
+Elizabeth DuPre, Eric Fiegel, Eric Larson, Erich Schubert, Erin Khoo, Erin R
+Hoffman, eschibli, Felix Wick, fhaselbeck, Forrest Koch, Francesco Casalegno,
+Frans Larsson, Gael Varoquaux, Gaurav Desai, Gaurav Sheni, genvalen, Geoffrey
+Bolmier, George Armstrong, George Kiragu, Gesa Stupperich, Ghislain Antony
+Vaillant, Gim Seng, Gordon Walsh, Gregory R. Lee, Guillaume Chevalier,
+Guillaume Lemaitre, Haesun Park, Hannah Bohle, Hao Chun Chang, Harry Scholes,
+Harsh Soni, Henry, Hirofumi Suzuki, Hitesh Somani, Hoda1394, Hugo Le Moine,
+hugorichard, indecisiveuser, Isuru Fernando, Ivan Wiryadi, j0rd1smit, Jaehyun
+Ahn, Jake Tae, James Hoctor, Jan Vesely, Jeevan Anand Anne, JeroenPeterBos,
+JHayes, Jiaxiang, Jie Zheng, Jigna Panchal, jim0421, Jin Li, Joaquin
+Vanschoren, Joel Nothman, Jona Sassenhagen, Jonathan, Jorge Gorbe Moya, Joseph
+Lucas, Joshua Newton, Juan Carlos Alfaro Jiménez, Julien Jerphanion, Justin
+Huber, Jérémie du Boisberranger, Kartik Chugh, Katarina Slama, kaylani2,
+Kendrick Cetina, Kenny Huynh, Kevin Markham, Kevin Winata, Kiril Isakov,
+kishimoto, Koki Nishihara, Krum Arnaudov, Kyle Kosic, Lauren Oldja, Laurenz
+Reitsam, Lisa Schwetlick, Louis Douge, Louis Guitton, Lucy Liu, Madhura
+Jayaratne, maikia, Manimaran, Manuel López-Ibáñez, Maren Westermann, Maria
+Telenczuk, Mariam-ke, Marijn van Vliet, Markus Löning, Martin Scheubrein,
+Martina G. Vilas, Martina Megasari, Mateusz Górski, mathschy, mathurinm,
+Matthias Bussonnier, Max Del Giudice, Michael, Milan Straka, Muoki Caleb, N.
+Haiat, Nadia Tahiri, Ph. D, Naoki Hamada, Neil Botelho, Nicolas Hug, Nils
+Werner, noelano, Norbert Preining, oj_lappi, Oleh Kozynets, Olivier Grisel,
+Pankaj Jindal, Pardeep Singh, Parthiv Chigurupati, Patrice Becker, Pete Green,
+pgithubs, Poorna Kumar, Prabakaran Kumaresshan, Probinette4, pspachtholz,
+pwalchessen, Qi Zhang, rachel fischoff, Rachit Toshniwal, Rafey Iqbal Rahman,
+Rahul Jakhar, Ram Rachum, RamyaNP, rauwuckl, Ravi Kiran Boggavarapu, Ray Bell,
+Reshama Shaikh, Richard Decal, Rishi Advani, Rithvik Rao, Rob Romijnders, roei,
+Romain Tavenard, Roman Yurchak, Ruby Werman, Ryotaro Tsukada, sadak, Saket
+Khandelwal, Sam, Sam Ezebunandu, Sam Kimbinyi, Sarah Brown, Saurabh Jain, Sean
+O. Stalley, Sergio, Shail Shah, Shane Keller, Shao Yang Hong, Shashank Singh,
+Shooter23, Shubhanshu Mishra, simonamaggio, Soledad Galli, Srimukh Sripada,
+Stephan Steinfurt, subrat93, Sunitha Selvan, Swier, Sylvain Marié, SylvainLan,
+t-kusanagi2, Teon L Brooks, Terence Honles, Thijs van den Berg, Thomas J Fan,
+Thomas J. Fan, Thomas S Benjamin, Thomas9292, Thorben Jensen, tijanajovanovic,
+Timo Kaufmann, tnwei, Tom Dupré la Tour, Trevor Waite, ufmayer, Umberto Lupo,
+Venkatachalam N, Vikas Pandey, Vinicius Rios Fuck, Violeta, watchtheblur, Wenbo
+Zhao, willpeppo, xavier dupré, Xethan, Xue Qianming, xun-tang, yagi-3, Yakov
+Pchelintsev, Yashika Sharma, Yi-Yan Ge, Yue Wu, Yutaro Ikeda, Zaccharie Ramzi,
+zoj613, Zhao Feng.
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
new file mode 100644
index 0000000000000..67cfc77551351
--- /dev/null
+++ b/doc/whats_new/v1.0.rst
@@ -0,0 +1,213 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_1_0:
+
+Version 1.0.0
+=============
+
+**In Development**
+
+
+.. include:: changelog_legend.inc
+
+Put the changes in their relevant module.
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- |Fix| :class:`manifold.TSNE` now avoids numerical underflow issues during
+  affinity matrix computation.
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+
+Changelog
+---------
+
+..
+    Entries should be grouped by module (in alphabetic order) and prefixed with
+    one of the labels: |MajorFeature|, |Feature|, |Efficiency|, |Enhancement|,
+    |Fix| or |API| (see whats_new.rst for descriptions).
+    Entries should be ordered by those labels (e.g. |Fix| after |Efficiency|).
+    Changes not specific to a module should be listed under *Multiple Modules*
+    or *Miscellaneous*.
+    Entries should end with:
+    :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
+    where 123456 is the *pull request* number, not the issue number.
+
+:mod:`sklearn.cluster`
+......................
+
+- |Efficiency| The "k-means++" initialization of :class:`cluster.KMeans` and
+  :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
+  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Efficiency| :class:`cluster.KMeans` with `algorithm='elkan'` is now faster
+  in multicore settings. :pr:`19052` by
+  :user:`Yusuke Nagasaka <YusukeNagasaka>`.
+
+- |API| :class:`cluster.Birch` attributes, `fit_` and `partial_fit_`, are
+  deprecated and will be removed in 1.2. :pr:`19297` by `Thomas Fan`_.
+
+:mod:`sklearn.datasets`
+.......................
+
+- |Enhancement| :func:`datasets.fetch_openml` now supports categories with
+  missing values when returning a pandas dataframe. :pr:`19365` by
+  `Thomas Fan`_ and :user:`Amanda Dsouza <amy12xx>` and
+  :user:`EL-ATEIF Sara <elateifsara>`.
+
+:mod:`sklearn.decomposition`
+............................
+
+- |API| In :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.MiniBatchDictionaryLearning`,
+  :func:`dict_learning` and :func:`dict_learning_online`,
+  `transform_alpha` will be equal to `alpha` instead of 1.0 by default
+  starting from version 1.2
+  :pr:`19159` by :user:`Benoît Malézieux <bmalezieux>`.
+
+- |Fix| Fixes incorrect multiple data-conversion warnings when clustering
+  boolean data. :pr:`19046` by :user:`Surya Prakash <jdsurya>`.
+
+- |Fix| Fixed :func:`dict_learning`, used by :class:`DictionaryLearning`, to
+  ensure determinism of the output. Achieved by flipping signs of the SVD
+  output which is used to initialize the code.
+  :pr:`18433` by :user:`Bruno Charron <brcharron>`.
+
+:mod:`sklearn.ensemble`
+.......................
+
+- |Fix| Do not allow to compute out-of-bag (OOB) score in
+  :class:`ensemble.RandomForestClassifier` and
+  :class:`ensemble.ExtraTreesClassifier` with multiclass-multioutput target
+  since scikit-learn does not provide any metric supporting this type of
+  target. Additional private refactoring was performed.
+  :pr:`19162` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.feature_extraction`
+.................................
+
+- |Fix| Fixed a bug in class:`feature_extraction.HashingVectorizer` where some
+  input strings would result in negative indices in the transformed data.
+  :pr:`19035` by :user:`Liu Yu <ly648499246>`.
+
+:mod:`sklearn.inspection`
+.........................
+
+- |Fix| Allow multiple scorers input to
+  :func:`~sklearn.inspection.permutation_importance`.
+  :pr:`19411` by :user:`Simona Maggio <simonamaggio>`.
+
+:mod:`sklearn.linear_model`
+...........................
+
+- |Enhancement| Validate user-supplied gram matrix passed to linear models
+  via the `precompute` argument. :pr:`19004` by :user:`Adam Midvidy <amidvidy>`.
+
+- |Fix| :meth:`ElasticNet.fit` no longer modifies `sample_weight` in place.
+  :pr:`19055` by `Thomas Fan`_.
+
+- |Fix| :class:`Lasso`, :class:`ElasticNet` no longer have a `dual_gap_`
+  not corresponding to their objective. :pr:`19172` by
+  :user:`Mathurin Massias <mathurinm>`
+
+:mod:`sklearn.preprocessing`
+............................
+
+- |Feature| :class:`preprocessing.OrdinalEncoder` supports passing through
+  missing values by default. :pr:`19069` by `Thomas Fan`_.
+
+- |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression`
+  is deprecated and will be removed in 1.2.
+  Motivation for this deprecation: ``normalize`` parameter did not take any
+  effect if ``fit_intercept`` was set to False and therefore was deemed
+  confusing.
+  The behavior of the deprecated LinearRegression(normalize=True) can be
+  reproduced with :class:`~sklearn.pipeline.Pipeline` with
+  :class:`~sklearn.preprocessing.StandardScaler`as follows:
+  make_pipeline(StandardScaler(with_mean=False), LinearRegression()).
+  :pr:`17743` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
+
+- |Fix|: `sample_weight` are now fully taken into account in linear models
+  when `normalize=True` for both feature centering and feature
+  scaling.
+  :pr:`19426` by :user:`Alexandre Gramfort <agramfort>` and
+  :user:`Maria Telenczuk <maikia>`.
+
+:mod:`sklearn.manifold`
+.......................
+
+- |Fix| Change numerical precision to prevent underflow issues
+  during affinity matrix computation for :class:`manifold.TSNE`.
+  :pr:`19472` by :user:`Dmitry Kobak <dkobak>`.
+
+:mod:`sklearn.metrics`
+......................
+
+- |API| :class:`metrics.ConfusionMatrixDisplay` exposes two class methods
+  :func:`~metrics.ConfusionMatrixDisplay.from_estimator` and
+  :func:`~metrics.ConfusionMatrixDisplay.from_predictions` allowing to create
+  a confusion matrix plot using an estimator or the predictions.
+  :func:`metrics.plot_confusion_matrix` is deprecated in favor of these two
+  class methods and will be removed in 1.2.
+  :pr:`18543` by `Guillaume Lemaitre`_.
+
+- |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for
+  quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
+  and :user:`Oliver Grisel <ogrisel>`.
+
+:mod:`sklearn.naive_bayes`
+..........................
+
+- |Fix| The `fit` and `partial_fit` methods of the discrete naive Bayes
+  classifiers (:class:`naive_bayes.BernoulliNB`,
+  :class:`naive_bayes.CategoricalNB`, :class:`naive_bayes.ComplementNB`,
+  and :class:`naive_bayes.MultinomialNB`) now correctly handle the degenerate
+  case of a single class in the training set.
+  :pr:`18925` by :user:`David Poznik <dpoznik>`.
+
+- |API| The attribute ``sigma_`` is now deprecated in
+  :class:`naive_bayes.GaussianNB` and will be removed in 1.2.
+  Use ``var_`` instead.
+  :pr:`18842` by :user:`Hong Shao Yang <hongshaoyang>`.
+
+:mod:`sklearn.preprocessing`
+............................
+
+- |Feature| The new :class:`preprocessing.SplineTransformer` is a feature
+  preprocessing tool for the generation of B-splines, parametrized by the
+  polynomial ``degree`` of the splines, number of knots ``n_knots`` and knot
+  positioning strategy ``knots``.
+  :pr:`18368` by :user:`Christian Lorentzen <lorentzenchr>`.
+
+:mod:`sklearn.tree`
+...................
+
+- |Enhancement| Add `fontname` argument in :func:`tree.export_graphviz`
+  for non-English characters. :pr:`18959` by :user:`Zero <Zeroto521>`
+  and :user:`wstates <wstates>`.
+
+- |Enhancement| Add `metrics.specificity_score` function. 
+  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
+  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
+
+Code and Documentation Contributors
+-----------------------------------
+
+Thanks to everyone who has contributed to the maintenance and improvement of
+the project since version 0.24, including:
+
+TODO: update at the time of the release.
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 74edcc069954e..d2a0eab4af18e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2081,12 +2081,12 @@ def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
     >>> specificity_score(y_true, y_pred, average='weighted')
     0.66...
     >>> specificity_score(y_true, y_pred, average=None)
-    array([0.75, 0.5 , 0.75])
+    array([0.75, 0.5, 0.75])
     >>> y_true = [0, 0, 0, 0, 0, 0]
     >>> specificity_score(y_true, y_pred, average=None)
-    array([0.        , 0.66666667, 0.83333333])
+    array([0. , 0.66666667, 0.83333333])
     >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
-    array([1.        , 0.66666667, 0.83333333])
+    array([1. , 0.66666667, 0.83333333])
     """
     _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
                                           labels=labels,

From 769b3af6e9c4f82e8be54eb2bc274b669444f99d Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 12:24:50 +0100
Subject: [PATCH 022/127] Fix linting

---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index a0d9b4d1445d4..13b3b6fed91c9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2089,9 +2089,9 @@ def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
     array([0.75, 0.5, 0.75])
     >>> y_true = [0, 0, 0, 0, 0, 0]
     >>> specificity_score(y_true, y_pred, average=None)
-    array([0. , 0.66666667, 0.83333333])
+    array([0., 0.66666667, 0.83333333])
     >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
-    array([1. , 0.66666667, 0.83333333])
+    array([1., 0.66666667, 0.83333333])
     """
     _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
                                           labels=labels,

From edd6ff0ad7be196217c9be15b95b801c1a9e6a70 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 12:30:57 +0100
Subject: [PATCH 023/127] Fix linting

---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 13b3b6fed91c9..0f02854038faa 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2089,9 +2089,9 @@ def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
     array([0.75, 0.5, 0.75])
     >>> y_true = [0, 0, 0, 0, 0, 0]
     >>> specificity_score(y_true, y_pred, average=None)
-    array([0., 0.66666667, 0.83333333])
+    array([0., 0.66..., 0.83...])
     >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
-    array([1., 0.66666667, 0.83333333])
+    array([1., 0.66..., 0.83...])
     """
     _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
                                           labels=labels,

From b8100d78757282168e08cfa331be5af103626378 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 20:11:20 +0100
Subject: [PATCH 024/127] Added scorer, fixed tests

---
 sklearn/metrics/_classification.py           | 11 +++++------
 sklearn/metrics/_scorer.py                   | 11 +++++++----
 sklearn/metrics/tests/test_classification.py |  6 +++---
 sklearn/tests/test_multiclass.py             |  8 +++++++-
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 0f02854038faa..cea6b35311c0b 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1543,7 +1543,6 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
     return precision, recall, f_score, true_sum
 
 
-@_deprecate_positional_args
 def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
                            average=None, warn_for=('tpr', 'fpr',
                                                    'tnr', 'fnr'),
@@ -1984,7 +1983,6 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
     return r
 
 
-@_deprecate_positional_args
 def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
                  average='binary', sample_weight=None, zero_division="warn"):
     """Compute specificity, also known as true negative rate.
@@ -2061,7 +2059,8 @@ def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
     See Also
     --------
     classification_report, precision_recall_fscore_support, recall_score,
-    balanced_accuracy_score, multilabel_confusion_matrix, tpr_fpr_tnr_fnr_scores
+    balanced_accuracy_score, multilabel_confusion_matrix,
+    tpr_fpr_tnr_fnr_scores
 
     Notes
     -----
@@ -2086,12 +2085,12 @@ def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
     >>> specificity_score(y_true, y_pred, average='weighted')
     0.66...
     >>> specificity_score(y_true, y_pred, average=None)
-    array([0.75, 0.5, 0.75])
+    array([0.75, 0.5 , 0.75])
     >>> y_true = [0, 0, 0, 0, 0, 0]
     >>> specificity_score(y_true, y_pred, average=None)
-    array([0., 0.66..., 0.83...])
+    array([0. , 0.66..., 0.83...])
     >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
-    array([1., 0.66..., 0.83...])
+    array([1. , 0.66..., 0.83...])
     """
     _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
                                           labels=labels,
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index c686d3b7c0b34..f2a14b5ec573e 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -28,9 +28,10 @@
                mean_squared_error, mean_squared_log_error,
                mean_poisson_deviance, mean_gamma_deviance, accuracy_score,
                top_k_accuracy_score, f1_score, roc_auc_score,
-               average_precision_score, precision_score, recall_score,
+               average_precision_score, precision_score, recall_score, 
                log_loss, balanced_accuracy_score, explained_variance_score,
-               brier_score_loss, jaccard_score, mean_absolute_percentage_error)
+               brier_score_loss, jaccard_score, mean_absolute_percentage_error,
+               specificity_score)
 
 from .cluster import adjusted_rand_score
 from .cluster import rand_score
@@ -734,8 +735,10 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
 
 
 for name, metric in [('precision', precision_score),
-                     ('recall', recall_score), ('f1', f1_score),
-                     ('jaccard', jaccard_score)]:
+                     ('recall', recall_score),
+                     ('f1', f1_score),
+                     ('jaccard', jaccard_score),
+                     ('specificity', specificity_score)]:
     SCORERS[name] = make_scorer(metric, average='binary')
     for average in ['macro', 'micro', 'samples', 'weighted']:
         qualified_name = '{0}_{1}'.format(name, average)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 0d765a1e4b66d..482954e182148 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1049,7 +1049,7 @@ def test_zero_precision_recall():
         assert_almost_equal(f1_score(y_true, y_pred, average='macro'),
                             0.0, 2)
         assert_almost_equal(specificity_score(y_true, y_pred, average='macro'),
-                            0.0, 2)
+                            0.5, 2)
 
     finally:
         np.seterr(**old_error_settings)
@@ -2118,7 +2118,7 @@ def test_specificity_warnings(zero_division):
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Tnr is ill-defined and '
-                    'being set to 0.0 due to no true samples.'
+                    'being set to 0.0 due to no negative samples.'
                     ' Use `zero_division` parameter to control'
                     ' this behavior.')
         else:
@@ -2128,7 +2128,7 @@ def test_specificity_warnings(zero_division):
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Tnr is ill-defined and '
-                    'being set to 0.0 due to no true samples.'
+                    'being set to 0.0 due to no negative samples.'
                     ' Use `zero_division` parameter to control'
                     ' this behavior.')
 
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 96bd1b807a95f..96933a7c86874 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -24,6 +24,7 @@
 
 from sklearn.metrics import precision_score
 from sklearn.metrics import recall_score
+from sklearn.metrics import specificity_score
 
 from sklearn.svm import LinearSVC, SVC
 from sklearn.naive_bayes import MultinomialNB
@@ -309,7 +310,9 @@ def test_ovr_fit_predict_svc():
 
 def test_ovr_multilabel_dataset():
     base_clf = MultinomialNB(alpha=1)
-    for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
+    for au, prec, recall, specificity in zip(
+            (True, False), (0.51, 0.66), (0.51, 0.80)
+            ):
         X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                        n_features=20,
                                                        n_classes=5,
@@ -329,6 +332,9 @@ def test_ovr_multilabel_dataset():
         assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
                             recall,
                             decimal=2)
+        assert_almost_equal(specificity_score(Y_test, Y_pred, average="micro"),
+                            specificity,
+                            decimal=2)
 
 
 def test_ovr_multilabel_predict_proba():

From aeed5e597acc2776be0cc0f011eda40d34148e1f Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 20:16:00 +0100
Subject: [PATCH 025/127] Linting

---
 sklearn/metrics/_classification.py | 3 ++-
 sklearn/metrics/_scorer.py         | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index cea6b35311c0b..197b8506fd7e5 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1984,7 +1984,8 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
 
 
 def specificity_score(y_true, y_pred, *, labels=None, pos_label=1,
-                 average='binary', sample_weight=None, zero_division="warn"):
+                      average='binary', sample_weight=None,
+                      zero_division="warn"):
     """Compute specificity, also known as true negative rate.
 
     The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index f2a14b5ec573e..6808e6c695f3d 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -28,7 +28,7 @@
                mean_squared_error, mean_squared_log_error,
                mean_poisson_deviance, mean_gamma_deviance, accuracy_score,
                top_k_accuracy_score, f1_score, roc_auc_score,
-               average_precision_score, precision_score, recall_score, 
+               average_precision_score, precision_score, recall_score,
                log_loss, balanced_accuracy_score, explained_variance_score,
                brier_score_loss, jaccard_score, mean_absolute_percentage_error,
                specificity_score)

From dc5423f71f2f36391ea5d94c97fc06a8ade5f191 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 21:04:44 +0100
Subject: [PATCH 026/127] Further tests

---
 sklearn/metrics/tests/test_common.py        | 29 ++++++++++++++++++++-
 sklearn/metrics/tests/test_score_objects.py | 13 ++++++---
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 9f2f997d46712..7b33603a08121 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -57,6 +57,7 @@
 from sklearn.metrics import recall_score
 from sklearn.metrics import roc_auc_score
 from sklearn.metrics import roc_curve
+from sklearn.metrics import specificity_score
 from sklearn.metrics import tpr_fpr_tnr_fnr_scores
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import ndcg_score
@@ -151,6 +152,8 @@
     "tpr_fpr_tnr_fnr_scores": tpr_fpr_tnr_fnr_scores,
     "binary_tpr_fpr_tnr_fnr_scores":
     partial(tpr_fpr_tnr_fnr_scores, average="binary"),
+    "specificity_score": specificity_score,
+    "binary_specificity_score": partial(specificity_score, average="binary"),
 
     "weighted_f0.5_score": partial(fbeta_score, average="weighted", beta=0.5),
     "weighted_f1_score": partial(f1_score, average="weighted"),
@@ -160,6 +163,8 @@
     "weighted_jaccard_score": partial(jaccard_score, average="weighted"),
     "weighted_tpr_fpr_tnr_fnr_scores":
     partial(tpr_fpr_tnr_fnr_scores, average="weighted"),
+    "weighted_specificity_score":
+    partial(specificity_score, average="weighted"),
 
     "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5),
     "micro_f1_score": partial(f1_score, average="micro"),
@@ -169,6 +174,7 @@
     "micro_jaccard_score": partial(jaccard_score, average="micro"),
     "micro_tpr_fpr_tnr_fnr_scores":
     partial(tpr_fpr_tnr_fnr_scores, average="micro"),
+    "micro_specificity_score": partial(specificity_score, average="micro"),
 
     "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5),
     "macro_f1_score": partial(f1_score, average="macro"),
@@ -178,6 +184,7 @@
     "macro_jaccard_score": partial(jaccard_score, average="macro"),
     "macro_tpr_fpr_tnr_fnr_scores":
     partial(tpr_fpr_tnr_fnr_scores, average="macro"),
+    "macro_specificity_score": partial(specificity_score, average="macro"),
 
     "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5),
     "samples_f1_score": partial(f1_score, average="samples"),
@@ -187,6 +194,7 @@
     "samples_jaccard_score": partial(jaccard_score, average="samples"),
     "samples_tpr_fpr_tnr_fnr_scores":
     partial(tpr_fpr_tnr_fnr_scores, average="samples"),
+    "samples_specificity_score": partial(specificity_score, average="samples"),
 
     "cohen_kappa_score": cohen_kappa_score,
 }
@@ -287,6 +295,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_recall_score",
     "samples_jaccard_score",
     "samples_tpr_fpr_tnr_fnr_scores",
+    "samples_specificity_score",
     "coverage_error",
     "unnormalized_multilabel_confusion_matrix_sample",
     "label_ranking_loss",
@@ -306,6 +315,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_roc_auc",
 
     "tpr_fpr_tnr_fnr_scores",
+    "specificity_score",
     "average_precision_score",
     "weighted_average_precision_score",
     "micro_average_precision_score",
@@ -320,6 +330,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f2_score",
     "f0.5_score",
     "binary_tpr_fpr_tnr_fnr_scores",
+    "binary_specificity_score",
 
     # curves
     "roc_curve",
@@ -334,7 +345,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
 # Metrics with an "average" argument
 METRICS_WITH_AVERAGING = {
     "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score",
-    "jaccard_score"
+    "jaccard_score", "specificity_score"
 }
 
 # Threshold-based metrics with an "average" argument
@@ -354,6 +365,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "jaccard_score",
 
     "tpr_fpr_tnr_fnr_scores",
+    "specificity_score",
     "average_precision_score",
     "weighted_average_precision_score",
     "micro_average_precision_score",
@@ -362,12 +374,15 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     # pos_label support deprecated; to be removed in 0.18:
     "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
     "weighted_precision_score", "weighted_recall_score",
+    "weighted_specificity_score",
 
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
+    "micro_specificity_score",
 
     "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
     "macro_precision_score", "macro_recall_score",
+    "macro_specificity_score",
 }
 
 # Metrics with a "labels" argument
@@ -384,20 +399,24 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "jaccard_score",
 
     "tpr_fpr_tnr_fnr_scores",
+    "specificity_score",
     "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
     "weighted_precision_score", "weighted_recall_score",
     "weighted_jaccard_score",
     "weighted_tpr_fpr_tnr_fnr_scores",
+    "weighted_specificity_score",
 
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
     "micro_jaccard_score",
     "micro_tpr_fpr_tnr_fnr_scores",
+    "micro_specificity_score",
 
     "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
     "macro_precision_score", "macro_recall_score",
     "macro_jaccard_score",
     "macro_tpr_fpr_tnr_fnr_scores",
+    "macro_specificity_score",
 
     "unnormalized_multilabel_confusion_matrix",
     "unnormalized_multilabel_confusion_matrix_sample",
@@ -441,16 +460,19 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_precision_score", "weighted_recall_score",
     "weighted_jaccard_score",
     "weighted_tpr_fpr_tnr_fnr_scores",
+    "weighted_specificity_score",
 
     "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
     "macro_precision_score", "macro_recall_score",
     "macro_jaccard_score",
     "macro_tpr_fpr_tnr_fnr_scores",
+    "macro_specificity_score",
 
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
     "micro_jaccard_score",
     "micro_tpr_fpr_tnr_fnr_scores",
+    "micro_specificity_score",
 
     "unnormalized_multilabel_confusion_matrix",
 
@@ -458,6 +480,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_precision_score", "samples_recall_score",
     "samples_jaccard_score",
     "samples_tpr_fpr_tnr_fnr_scores",
+    "samples_specificity_score",
 }
 
 # Regression metrics with "multioutput-continuous" format support
@@ -484,6 +507,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_f0.5_score", "micro_f1_score", "micro_f2_score",
     "micro_precision_score", "micro_recall_score",
     "micro_tpr_fpr_tnr_fnr_scores",
+    "micro_specificity_score",
 
     "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error",
     "median_absolute_error", "max_error",
@@ -512,6 +536,9 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "tpr_fpr_tnr_fnr_scores",
     "weighted_tpr_fpr_tnr_fnr_scores",
     "macro_tpr_fpr_tnr_fnr_scores",
+    "specificity_score",
+    "weighted_specificity_score",
+    "macro_specificity_score",
 
     "weighted_f0.5_score", "weighted_f1_score", "weighted_f2_score",
     "weighted_precision_score", "weighted_jaccard_score",
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index be214944e6ee4..0bb38e93ac4a8 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -30,7 +30,8 @@
     r2_score,
     recall_score,
     roc_auc_score,
-    top_k_accuracy_score
+    top_k_accuracy_score,
+    specificity_score
 )
 from sklearn.metrics import cluster as cluster_module
 from sklearn.metrics import check_scoring
@@ -73,7 +74,8 @@
                'neg_log_loss', 'neg_brier_score',
                'jaccard', 'jaccard_weighted', 'jaccard_macro',
                'jaccard_micro', 'roc_auc_ovr', 'roc_auc_ovo',
-               'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted']
+               'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted',
+               'specificity']
 
 # All supervised cluster scorers (They behave like classification metric)
 CLUSTER_SCORERS = ["adjusted_rand_score",
@@ -87,7 +89,7 @@
                    "fowlkes_mallows_score"]
 
 MULTILABEL_ONLY_SCORERS = ['precision_samples', 'recall_samples', 'f1_samples',
-                           'jaccard_samples']
+                           'jaccard_samples', 'specificity_samples']
 
 REQUIRE_POSITIVE_Y_SCORERS = ['neg_mean_poisson_deviance',
                               'neg_mean_gamma_deviance']
@@ -326,6 +328,7 @@ def test_make_scorer():
     ('jaccard_macro', partial(jaccard_score, average='macro')),
     ('jaccard_micro', partial(jaccard_score, average='micro')),
     ('top_k_accuracy', top_k_accuracy_score),
+    ('specificity', specificity_score)
 ])
 def test_classification_binary_scores(scorer_name, metric):
     # check consistency between score and scorer for scores supporting
@@ -355,6 +358,7 @@ def test_classification_binary_scores(scorer_name, metric):
     ('jaccard_weighted', partial(jaccard_score, average='weighted')),
     ('jaccard_macro', partial(jaccard_score, average='macro')),
     ('jaccard_micro', partial(jaccard_score, average='micro')),
+    ('specificity', specificity_score)
 ])
 def test_classification_multiclass_scores(scorer_name, metric):
     # check consistency between score and scorer for scores supporting
@@ -950,7 +954,8 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem):
 
 
 @pytest.mark.parametrize(
-    "score_func", [f1_score, precision_score, recall_score, jaccard_score]
+    "score_func", [f1_score, precision_score, recall_score, jaccard_score,
+                   specificity_score]
 )
 def test_non_symmetric_metric_pos_label(
     score_func, string_labeled_classification_problem

From 9c1a0a48f3c47be929e3fcd78b4fab240cd1aa63 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 21:49:18 +0100
Subject: [PATCH 027/127] Test bug fixes

---
 sklearn/metrics/tests/test_classification.py | 6 +++---
 sklearn/metrics/tests/test_score_objects.py  | 3 ++-
 sklearn/tests/test_multiclass.py             | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 482954e182148..2909ec89d63f0 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2107,13 +2107,13 @@ def test_fscore_warnings(zero_division):
 @pytest.mark.parametrize('zero_division', ["warn", 0, 1])
 def test_specificity_warnings(zero_division):
     assert_no_warnings(specificity_score,
-                       np.array([[1, 1], [1, 1]]),
                        np.array([[0, 0], [0, 0]]),
+                       np.array([[1, 1], [1, 1]]),
                        average='micro', zero_division=zero_division)
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter('always')
-        specificity_score(np.array([[0, 0], [0, 0]]),
-                          np.array([[1, 1], [1, 1]]),
+        specificity_score(np.array([[1, 1], [1, 1]]),
+                          np.array([[0, 0], [0, 0]]),
                           average='micro', zero_division=zero_division)
         if zero_division == "warn":
             assert (str(record.pop().message) ==
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 0bb38e93ac4a8..263800847ce6c 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -75,7 +75,8 @@
                'jaccard', 'jaccard_weighted', 'jaccard_macro',
                'jaccard_micro', 'roc_auc_ovr', 'roc_auc_ovo',
                'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted',
-               'specificity']
+               'specificity', 'specificity_weighted', 'specificity_macro',
+               'specificity_micro']
 
 # All supervised cluster scorers (They behave like classification metric)
 CLUSTER_SCORERS = ["adjusted_rand_score",
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 96933a7c86874..a2f8ab05f542f 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -311,7 +311,7 @@ def test_ovr_fit_predict_svc():
 def test_ovr_multilabel_dataset():
     base_clf = MultinomialNB(alpha=1)
     for au, prec, recall, specificity in zip(
-            (True, False), (0.51, 0.66), (0.51, 0.80)
+            (True, False), (0.51, 0.66), (0.51, 0.80), (0.66, 0.71)
             ):
         X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                        n_features=20,

From 2e8efc957a911b999b65245a7b08080775749acf Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 25 Feb 2021 22:13:10 +0100
Subject: [PATCH 028/127] Test bug fixes

---
 sklearn/metrics/tests/test_classification.py | 4 ++--
 sklearn/metrics/tests/test_score_objects.py  | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 2909ec89d63f0..c99d2568d1263 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2118,7 +2118,7 @@ def test_specificity_warnings(zero_division):
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Tnr is ill-defined and '
-                    'being set to 0.0 due to no negative samples.'
+                    'being set to 0.0 due to no negatives samples.'
                     ' Use `zero_division` parameter to control'
                     ' this behavior.')
         else:
@@ -2128,7 +2128,7 @@ def test_specificity_warnings(zero_division):
         if zero_division == "warn":
             assert (str(record.pop().message) ==
                     'Tnr is ill-defined and '
-                    'being set to 0.0 due to no negative samples.'
+                    'being set to 0.0 due to no negatives samples.'
                     ' Use `zero_division` parameter to control'
                     ' this behavior.')
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 263800847ce6c..b3df79b6b913e 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -359,7 +359,9 @@ def test_classification_binary_scores(scorer_name, metric):
     ('jaccard_weighted', partial(jaccard_score, average='weighted')),
     ('jaccard_macro', partial(jaccard_score, average='macro')),
     ('jaccard_micro', partial(jaccard_score, average='micro')),
-    ('specificity', specificity_score)
+    ('specificity_weighted', partial(specificity_score, average='weighted')),
+    ('specificity_macro', partial(specificity_score, average='macro')),
+    ('specificity_micro', partial(specificity_score, average='micro'))
 ])
 def test_classification_multiclass_scores(scorer_name, metric):
     # check consistency between score and scorer for scores supporting

From 867e28756d55414bd8b4934c462e71f1f96071c0 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sun, 18 Apr 2021 10:58:35 +0200
Subject: [PATCH 029/127] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 208 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 187 insertions(+), 21 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 5f093b7e9e8ee..b1c191947927e 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -45,6 +45,44 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+- |API| The option for using the squared error via ``loss`` and
+  ``criterion`` parameters was made more consistent. The preferred way is by
+  setting the value to `"squared_error"`. Old option names are still valid,
+  produce the same models, but are deprecated and will be removed in version
+  1.2.
+  :pr:`19310` by :user:`Christian Lorentzen <lorentzenchr>`.
+
+  - For :class:`ensemble.ExtraTreesRegressor`, `criterion="mse"` is deprecated,
+    use `"squared_error"` instead which is now the default.
+
+  - For :class:`ensemble.GradientBoostingRegressor`, `loss="ls"` is deprecated,
+    use `"squared_error"` instead which is now the default.
+
+  - For :class:`ensemble.RandomForestRegressor`, `criterion="mse"` is deprecated,
+    use `"squared_error"` instead which is now the default.
+
+  - For :class:`ensemble.HistGradientBoostingRegressor`, `loss="least_squares"`
+    is deprecated, use `"squared_error"` instead which is now the default.
+
+  - For :class:`linear_model.RANSACRegressor`, `loss="squared_loss"` is
+    deprecated, use `"squared_error"` instead.
+
+  - For :class:`linear_model.SGDRegressor`, `loss="squared_loss"` is
+    deprecated, use `"squared_error"` instead which is now the default.
+
+  - For :class:`tree.DecisionTreeRegressor`, `criterion="mse"` is deprecated,
+    use `"squared_error"` instead which is now the default.
+
+  - For :class:`tree.ExtraTreeRegressor`, `criterion="mse"` is deprecated,
+    use `"squared_error"` instead which is now the default.
+
+:mod:`sklearn.calibration`
+..........................
+
+- |Fix| The predict and predict_proba methods of
+  :class:`calibration.CalibratedClassifierCV` can now properly be used on
+  prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`.
+
 :mod:`sklearn.cluster`
 ......................
 
@@ -57,9 +95,46 @@ Changelog
   in multicore settings. :pr:`19052` by
   :user:`Yusuke Nagasaka <YusukeNagasaka>`.
 
+- |Efficiency| :class:`cluster.MiniBatchKMeans` is now faster in multicore
+  settings. :pr:`17622` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Fix| Fixed a bug in :class:`cluster.MiniBatchKMeans` where the sample
+  weights were partially ignored when the input is sparse. :pr:`17622` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Fix| Improved convergence detection based on center change in
+  :class:`cluster.MiniBatchKMeans` which was almost never achievable.
+  :pr:`17622` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  
+- |FIX| :class:`cluster.AgglomerativeClustering` now supports readonly
+  memory-mapped datasets. :pr:`19883` by `Julien Jerphanion <jjerphan>`.
+
 - |API| :class:`cluster.Birch` attributes, `fit_` and `partial_fit_`, are
   deprecated and will be removed in 1.2. :pr:`19297` by `Thomas Fan`_.
 
+- |API| the default value for the `batch_size` parameter of
+  :class:`MiniBatchKMeans` was changed from 100 to 1024 due to efficiency
+  reasons. The `n_iter_` attribute of :class:`MiniBatchKMeans` now reports the
+  number of started epochs and the `n_steps_` attribute reports the number of
+  mini batches processed. :pr:`17622`
+  by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+:mod:`sklearn.compose`
+......................
+
+- |Enhancement| :class:`compose.ColumnTransformer` now records the output
+  of each transformer in `output_indices_`. :pr:`18393` by
+  :user:`Luca Bittarello <lbittarello>`.
+
+- |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports
+  non-string feature names returned by any of its transformers.
+  :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>` and
+  :user:`Alonso Silva Allende <alonsosilvaallende>`.
+
+- |Fix| :class:`compose.TransformedTargetRegressor` now takes nD targets with
+  an adequate transformer.
+  :pr:`18898` by :user:`Oras Phongpanagnam <panangam>`.
+
 :mod:`sklearn.datasets`
 .......................
 
@@ -68,6 +143,9 @@ Changelog
   `Thomas Fan`_ and :user:`Amanda Dsouza <amy12xx>` and
   :user:`EL-ATEIF Sara <elateifsara>`.
 
+- |Enhancement| :func:`datasets.fetch_kddcup99` raises a better message
+  when the cached file is invalid. :pr:`19669` `Thomas Fan`_.
+
 :mod:`sklearn.decomposition`
 ............................
 
@@ -81,11 +159,25 @@ Changelog
 - |Fix| Fixes incorrect multiple data-conversion warnings when clustering
   boolean data. :pr:`19046` by :user:`Surya Prakash <jdsurya>`.
 
+:mod:`sklearn.decomposition`
+............................
+
 - |Fix| Fixed :func:`dict_learning`, used by :class:`DictionaryLearning`, to
   ensure determinism of the output. Achieved by flipping signs of the SVD
   output which is used to initialize the code.
   :pr:`18433` by :user:`Bruno Charron <brcharron>`.
 
+- |Fix| Fixed a bug in :class:`MiniBatchDictionaryLearning`,
+  :class:`MiniBatchSparsePCA` and :func:`dict_learning_online` where the
+  update of the dictionary was incorrect. :pr:`19198` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Fix| Fixed a bug in :class:`DictionaryLearning`, :class:`SparsePCA`,
+  :class:`MiniBatchDictionaryLearning`, :class:`MiniBatchSparsePCA`,
+  :func:`dict_learning` and :func:`dict_learning_online` where the restart of
+  unused atoms during the dictionary update was not working as expected.
+  :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
 :mod:`sklearn.ensemble`
 .......................
 
@@ -96,6 +188,15 @@ Changelog
   target. Additional private refactoring was performed.
   :pr:`19162` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Enhancement| :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+  :class:`~sklearn.ensemble.HistGradientBoostingRegressor` are no longer
+  experimental. They are now considered stable and are subject to the same
+  deprecation cycles as all other estimators. :pr:`19799` by `Nicolas Hug`_.
+
+- |Enhancement| Improve the HTML rendering of the
+  :class:`ensemble.StackingClassifier` and :class:`ensemble.StackingRegressor`.
+  :pr:`19564` by `Thomas Fan`_.
+
 :mod:`sklearn.feature_extraction`
 .................................
 
@@ -113,6 +214,13 @@ Changelog
 :mod:`sklearn.linear_model`
 ...........................
 
+- |Feature| The new :class:`linear_model.SGDOneClassSVM` provides an SGD
+  implementation of the linear One-Class SVM. Combined with kernel
+  approximation techniques, this implementation approximates the solution of
+  a kernelized One Class SVM while benefitting from a linear
+  complexity in the number of samples.
+  :pr:`10027` by :user:`Albert Thomas <albertcthomas>`.
+
 - |Efficiency| The implementation of :class:`linear_model.LogisticRegression`
   has been optimised for dense matrices when using `solver='newton-cg'` and
   `multi_class!='multinomial'`.
@@ -128,23 +236,23 @@ Changelog
   not corresponding to their objective. :pr:`19172` by
   :user:`Mathurin Massias <mathurinm>`
 
-:mod:`sklearn.preprocessing`
-............................
-
-- |Feature| :class:`preprocessing.OrdinalEncoder` supports passing through
-  missing values by default. :pr:`19069` by `Thomas Fan`_.
-
 - |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression`
   is deprecated and will be removed in 1.2.
   Motivation for this deprecation: ``normalize`` parameter did not take any
   effect if ``fit_intercept`` was set to False and therefore was deemed
   confusing.
-  The behavior of the deprecated LinearRegression(normalize=True) can be
+  The behavior of the deprecated LinearModel(normalize=True) can be
   reproduced with :class:`~sklearn.pipeline.Pipeline` with
-  :class:`~sklearn.preprocessing.StandardScaler`as follows:
-  make_pipeline(StandardScaler(with_mean=False), LinearRegression()).
+  :class:`~sklearn.preprocessing.LinearModel` (where LinearModel is
+  LinearRegression, Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV) as
+  follows:
+  make_pipeline(StandardScaler(with_mean=False), LinearModel()).
+  LinearRegression was deprecated in:
   :pr:`17743` by :user:`Maria Telenczuk <maikia>` and
   :user:`Alexandre Gramfort <agramfort>`.
+  Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV were deprecated in:
+  :pr:`17772` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
 
 - |Fix|: `sample_weight` are now fully taken into account in linear models
   when `normalize=True` for both feature centering and feature
@@ -152,6 +260,10 @@ Changelog
   :pr:`19426` by :user:`Alexandre Gramfort <agramfort>` and
   :user:`Maria Telenczuk <maikia>`.
 
+- |Efficiency| The implementation of `fit` for `PolynomialFeatures` transformer
+  is now faster. This is especially noticeable on large sparse input.
+  :pr:`19734` by :user:`Fred Robinson <frrad>`.
+
 :mod:`sklearn.manifold`
 .......................
 
@@ -178,14 +290,35 @@ Changelog
 - |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for
   quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
   and :user:`Oliver Grisel <ogrisel>`.
+  
+- |Feature| Add :func:`metrics.specificity_score`. 
+  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
+  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
 
 - |Efficiency| Improved speed of :func:`metrics.confusion_matrix` when labels
   are integral.
   :pr:`9843` by :user:`Jon Crall <Erotemic>`.
 
-- |Enhancement| Add :func:`metrics.specificity_score`. 
-  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
-  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
+- |Fix| Samples with zero `sample_weight` values do not affect the results
+  from :func:`metrics.det_curve`, :func:`metrics.precision_recall_curve`
+  and :func:`metrics.roc_curve`.
+  :pr:`18328` by :user:`Albert Villanova del Moral <albertvillanova>` and
+  :user:`Alonso Silva Allende <alonsosilvaallende>`.
+
+:mod:`sklearn.model_selection`
+..............................
+
+- |Feature| added :class:`model_selection.StratifiedGroupKFold`, that combines
+  :class:`model_selection.StratifiedKFold` and `model_selection.GroupKFold`,
+  providing an ability to split data preserving the distribution of classes in
+  each split while keeping each group within a single split.
+  :pr:`18649` by `Leandro Hermida <hermidalc>` and
+  `Rodion Martynov <marrodion>`.
+
+- |Fix| The `fit` method of the successive halving parameter search 
+  (:class:`model_selection.HalvingGridSearchCV`, and
+  :class:`model_selection.HalvingRandomSearchCV`) now correctly handles the
+  `groups` parameter. :pr:`19847` by :user:`Xiaoyu Chai <xiaoyuchai>`.
 
 :mod:`sklearn.naive_bayes`
 ..........................
@@ -202,6 +335,27 @@ Changelog
   Use ``var_`` instead.
   :pr:`18842` by :user:`Hong Shao Yang <hongshaoyang>`.
 
+
+:mod:`sklearn.neighbors`
+..........................
+
+- |Enhancement| The creation of :class:`neighbors.KDTree` and
+  :class:`neighbors.BallTree` has been improved for their worst-cases time
+  complexity from :math:`\mathcal{O}(n^2)` to :math:`\mathcal{O}(n)`.
+  :pr:`19473` by :user:`jiefangxuanyan <jiefangxuanyan>` and
+  :user:`Julien Jerphanion <jjerphan>`.
+
+- |FIX| :class:`neighbors.DistanceMetric` subclasses now support readonly
+  memory-mapped datasets. :pr:`19883` by `Julien Jerphanion <jjerphan>`.
+
+:mod:`sklearn.pipeline`
+.......................
+
+- |API| The `predict_proba` and `predict_log_proba` methods of the
+  :class:`Pipeline` class now support passing prediction kwargs to
+  the final estimator.
+  :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
+
 :mod:`sklearn.preprocessing`
 ............................
 
@@ -214,12 +368,27 @@ Changelog
   splines via the ``extrapolation`` argument.
   :pr:`19483` by :user:`Malte Londschien <mlondschien>`.
 
+- |Feature| :class:`preprocessing.OrdinalEncoder` supports passing through
+  missing values by default. :pr:`19069` by `Thomas Fan`_.
+
 - |Fix| :func:`preprocessing.scale`, :class:`preprocessing.StandardScaler`
   and similar scalers detect near-constant features to avoid scaling them to
   very large values. This problem happens in particular when using a scaler on
   sparse data with a constant column with sample weights, in which case
   centering is typically disabled. :pr:`19527` by :user:`Oliver Grisel
-  <ogrisel>` and :user:`Maria Telenczuk <maikia>`.
+  <ogrisel>` and :user:`Maria Telenczuk <maikia>` and :pr:`19788` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Fix| :meth:`preprocessing.StandardScaler.inverse_transform` now
+  correctly handles integer dtypes. :pr:`19356` by :user:`makoeppel`.
+
+- |Feature| :class:`preprocessing.OneHotEncoder` now supports
+  `handle_unknown='ignore'` and dropping categories. :pr:`19041` by
+  `Thomas Fan`_.
+
+- |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` is not
+  supporting sparse matrix and raise the appropriate error message.
+  :pr:`19879` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 :mod:`sklearn.tree`
 ...................
@@ -231,18 +400,15 @@ Changelog
 :mod:`sklearn.utils`
 ....................
 
-- |Enhancement| Deprecated the default value of the `random_state=0` in 
+- |Enhancement| Deprecated the default value of the `random_state=0` in
   :func:`~sklearn.utils.extmath.randomized_svd`. Starting in 1.2,
   the default value of `random_state` will be set to `None`.
-  :pr:`19459` by :user:`Cindy Bezuidenhout <cinbez>` and 
+  :pr:`19459` by :user:`Cindy Bezuidenhout <cinbez>` and
   :user:`Clifford Akai-Nettey<cliffordEmmanuel>`.
 
-:mod:`sklearn.calibration`
-............................
-
-- |Fix| The predict and predict_proba methods of
-  :class:`calibration.CalibratedClassifierCV can now properly be used on
-  prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`
+- |Fix| Fixed a bug in :func:`utils.sparsefuncs.mean_variance_axis` where the
+  precision of the computed variance was very poor when the real variance is
+  exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 Code and Documentation Contributors
 -----------------------------------

From f29b0c930a14899b0616c9670dfb8582c1f5298b Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 22 Apr 2021 12:04:49 +0200
Subject: [PATCH 030/127] Add `metrics.fpr_tpr_fnr_tnr_scores` to v1.0.rst

---
 doc/whats_new/v1.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 12bc071b95d3c..bb532db1d1b86 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -291,7 +291,8 @@ Changelog
   quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
   and :user:`Oliver Grisel <ogrisel>`.
   
-- |Feature| Add :func:`metrics.specificity_score`. 
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`
+  and :func:`metrics.specificity_score`.
   :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
   and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
 

From 6311b30709126baa73c195a3fcd64961f74d8cb1 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 22 Apr 2021 15:12:58 +0200
Subject: [PATCH 031/127] tpr, fpr, tnr, fnr to upper case

---
 sklearn/metrics/_classification.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index f72b5da6c82db..91ce3b8267b11 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1719,13 +1719,13 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
 
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
-    tpr = _prf_divide(tp_sum, pos_sum, 'tpr', 'positives',
+    tpr = _prf_divide(tp_sum, pos_sum, 'TPR', 'positives',
                       average, warn_for, zero_division)
-    fpr = _prf_divide(fp_sum, neg_sum, 'fpr', 'negatives',
+    fpr = _prf_divide(fp_sum, neg_sum, 'FPR', 'negatives',
                       average, warn_for, zero_division)
-    tnr = _prf_divide(tn_sum, neg_sum, 'tnr', 'negatives',
+    tnr = _prf_divide(tn_sum, neg_sum, 'TNR', 'negatives',
                       average, warn_for, zero_division)
-    fnr = _prf_divide(fn_sum, pos_sum, 'fnr', 'positives',
+    fnr = _prf_divide(fn_sum, pos_sum, 'FNR', 'positives',
                       average, warn_for, zero_division)
     # Average the results
     if average == 'weighted':

From 50125d72888a9d35d6b2e478ed3a309735c507b0 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 22 Apr 2021 15:57:04 +0200
Subject: [PATCH 032/127] Update test_classification.py

---
 sklearn/metrics/tests/test_classification.py | 88 ++++++++++----------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 202dffaaef924..e86fe53d63de4 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -424,49 +424,64 @@ def test_tpr_fpr_tnr_fnr_scores_multiclass():
     assert_array_almost_equal(fnr, [0.21, 0.1, 0.9], 2)
 
 
-@ignore_warnings
 @pytest.mark.parametrize('zero_division', ["warn", 0, 1])
 def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
+    
+    pytest.warns(Warning if zero_division == "warn" else None)
 
-    zero_division = 1.0 if zero_division == 1.0 else 0.0
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
-                                                average=None,
-                                                zero_division=zero_division)
-    assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division], 2)
+    zero_division_value = 1.0 if zero_division == 1.0 else 0.0
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true, y_pred,
+        average=None,
+        zero_division=zero_division
+    )
+    assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division_value], 2)
     assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2)
     assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
-    assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division], 2)
+    assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division_value], 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
-                                                average="macro",
-                                                zero_division=zero_division)
-    assert_almost_equal(tpr, 0.625 if zero_division else 0.375)
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true,
+        y_pred,
+        average="macro",
+        zero_division=zero_division
+    )
+    assert_almost_equal(tpr, 0.625 if zero_division_value else 0.375)
     assert_almost_equal(fpr, 1 / 3.0 / 4.0)
     assert_almost_equal(tnr, 0.91666, 5)
-    assert_almost_equal(fnr, 0.625 if zero_division else 0.375)
+    assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
-                                                average="micro",
-                                                zero_division=zero_division)
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true,
+        y_pred,
+        average="micro",
+        zero_division=zero_division
+    )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.125)
     assert_almost_equal(tnr, 0.875)
     assert_almost_equal(fnr, 0.5)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
-                                                average="weighted",
-                                                zero_division=zero_division)
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true,
+        y_pred,
+        average="weighted",
+        zero_division=zero_division
+    )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0)
     assert_almost_equal(tnr, 1.0)
     assert_almost_equal(fnr, 0.5)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
-                                                average="samples",
-                                                sample_weight=[1, 1, 2],
-                                                zero_division=zero_division)
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+        y_true,
+        y_pred,
+        average="samples",
+        sample_weight=[1, 1, 2],
+        zero_division=zero_division
+    )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.08333, 5)
     assert_almost_equal(tnr, 0.91666, 5)
@@ -2111,27 +2126,16 @@ def test_specificity_warnings(zero_division):
                        np.array([[0, 0], [0, 0]]),
                        np.array([[1, 1], [1, 1]]),
                        average='micro', zero_division=zero_division)
-    with warnings.catch_warnings(record=True) as record:
-        warnings.simplefilter('always')
-        specificity_score(np.array([[1, 1], [1, 1]]),
-                          np.array([[0, 0], [0, 0]]),
-                          average='micro', zero_division=zero_division)
-        if zero_division == "warn":
-            assert (str(record.pop().message) ==
-                    'Tnr is ill-defined and '
-                    'being set to 0.0 due to no negatives samples.'
-                    ' Use `zero_division` parameter to control'
-                    ' this behavior.')
-        else:
-            assert len(record) == 0
 
-        specificity_score([1, 1], [1, 1])
-        if zero_division == "warn":
-            assert (str(record.pop().message) ==
-                    'Tnr is ill-defined and '
-                    'being set to 0.0 due to no negatives samples.'
-                    ' Use `zero_division` parameter to control'
-                    ' this behavior.')
+    specificity_score(np.array([[1, 1], [1, 1]]),
+                      np.array([[0, 0], [0, 0]]),
+                      average='micro', zero_division=zero_division)
+    if zero_division == "warn":
+        pytest.warns(Warning if zero_division == "warn" else None)
+
+    specificity_score([1, 1], [1, 1])
+    if zero_division == "warn":
+        pytest.warns(Warning if zero_division == "warn" else None)
 
 
 def test_prf_average_binary_data_non_binary():

From bae4c360dc3e3e73a88fcb3fc29ff9b6ff178e5a Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 22 Apr 2021 15:59:59 +0200
Subject: [PATCH 033/127] Linting fix

---
 sklearn/metrics/tests/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index e86fe53d63de4..43726aa8bda99 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -428,7 +428,7 @@ def test_tpr_fpr_tnr_fnr_scores_multiclass():
 def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
-    
+
     pytest.warns(Warning if zero_division == "warn" else None)
 
     zero_division_value = 1.0 if zero_division == 1.0 else 0.0

From 5a391fa47c7fbe3328657073cd89475f86506efc Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 22 Apr 2021 16:58:31 +0200
Subject: [PATCH 034/127] Typo fix

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 91ce3b8267b11..976a709eb38d8 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2510,7 +2510,7 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes) \
+    y_pred : array-like of float, shape = (n_samples, n_classes)
     or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``

From 07b9a6834e25ea9cb6febe029ad5fd8be7980942 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Fri, 23 Apr 2021 07:48:35 +0200
Subject: [PATCH 035/127] Revert "Typo fix"

This reverts commit 5a391fa47c7fbe3328657073cd89475f86506efc.
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 976a709eb38d8..91ce3b8267b11 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2510,7 +2510,7 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes)
+    y_pred : array-like of float, shape = (n_samples, n_classes) \
     or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``

From 366a1132eeacf6651d2a66610e3e60a040230f13 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Fri, 23 Apr 2021 09:27:04 +0200
Subject: [PATCH 036/127] retrigger checks


From 1e5159fdc7fb447f01776318814962a882ea4449 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sun, 25 Apr 2021 16:59:59 +0200
Subject: [PATCH 037/127] retrigger checks


From 8fc7ec00dc471e78a064bde25ef298028543790d Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 27 Apr 2021 08:54:06 +0200
Subject: [PATCH 038/127] retrigger checks


From 459708f166a730f1c0845e0b6611582cea02341e Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Fri, 30 Apr 2021 07:11:15 +0200
Subject: [PATCH 039/127] retrigger checks


From b6167ec6f1b364e1d023ae0a49d2af307ecb06b7 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 11 Sep 2021 20:01:43 +0200
Subject: [PATCH 040/127] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index ef3d4bedc6d37..028b8ab51e77e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1567,11 +1567,11 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
     Parameters
     ----------
     y_true : {array-like, label indicator array, sparse matrix} \
-        of shape (n_samples,)
+            of shape (n_samples,)
         Ground truth (correct) target values.
 
     y_pred : {array-like, label indicator array, sparse matrix} \
-        of shape (n_samples,)
+            of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
     labels : list, default=None
@@ -1666,7 +1666,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
     (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
     0.6666666666666666)
 
-    It is possible to compute per-label fpr, fnr, tnr, tpr and
+    It is possible to compute per-label FPR, FNR, TNR, TPR and
     supports instead of averaging:
 
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None,
@@ -1678,7 +1678,7 @@ def tpr_fpr_tnr_fnr_scores(y_true, y_pred, *, labels=None, pos_label=1,
     -----
     When ``true positive + false negative == 0``, TPR, FNR are undefined;
     When ``true negative + false positive == 0``, FPR, TNR are undefined.
-    In such cases, by default the metric will be set to 0, as will f-score,
+    In such cases, by default the metric will be set to 0, as will F-score,
     and ``UndefinedMetricWarning`` will be raised. This behavior can be
     modified with ``zero_division``.
     """
@@ -2494,7 +2494,7 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
         Ground truth (correct) labels for n_samples samples.
 
     y_pred : array-like of float, shape = (n_samples, n_classes) \
-    or (n_samples,)
+            or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the

From 2b8efccf7a3c6d828bcf76a58cc28b136b0c8745 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 11:27:50 +0200
Subject: [PATCH 041/127] Fix recall

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 69f849e4266bb..54fc12f79aa61 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1748,7 +1748,7 @@ def precision_score(
     return p
 
 
-recall_score(
+def recall_score(
     y_true,
     y_pred,
     *,

From bea1cb0c73274fe08003780998ba5a13f525dadd Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 12:08:47 +0200
Subject: [PATCH 042/127] Linting fixes

---
 sklearn/metrics/_classification.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 54fc12f79aa61..4631f3dd803ac 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2978,11 +2978,14 @@ def specificity_score(
     >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
     array([1. , 0.66..., 0.83...])
     """
-    _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(y_true, y_pred,
-                                          labels=labels,
-                                          pos_label=pos_label,
-                                          average=average,
-                                          warn_for=('tnr',),
-                                          sample_weight=sample_weight,
-                                          zero_division=zero_division)
+    _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(
+        y_true,
+        y_pred,
+        labels=labels,
+        pos_label=pos_label,
+        average=average,
+        warn_for=("tnr",),
+        sample_weight=sample_weight,
+        zero_division=zero_division,
+    )
     return tnr

From d150e0679457070b738f2bc75a07a9b62954e57e Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 12:10:37 +0200
Subject: [PATCH 043/127] Fix linting

Fix linting
---
 sklearn/metrics/_scorer.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 558202581cabf..51945a7b71f42 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -787,8 +787,7 @@ def make_scorer(
     ("recall", recall_score),
     ("f1", f1_score),
     ("jaccard", jaccard_score),
-    ('specificity', 
-    ),
+    ("specificity", specificity_score),
 ]:
     SCORERS[name] = make_scorer(metric, average="binary")
     for average in ["macro", "micro", "samples", "weighted"]:

From cf36e6ac449e5544de1b1a2e73e3173fd256f0e5 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 12:16:22 +0200
Subject: [PATCH 044/127] Fix linting

---
 sklearn/metrics/tests/test_common.py | 30 +++++++++++++++++-----------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index a1639c6ef089e..ff561ec796d4b 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -146,8 +146,9 @@
     "f0.5_score": partial(fbeta_score, beta=0.5),
     "matthews_corrcoef_score": matthews_corrcoef,
     "tpr_fpr_tnr_fnr_scores": tpr_fpr_tnr_fnr_scores,
-    "binary_tpr_fpr_tnr_fnr_scores":
-    partial(tpr_fpr_tnr_fnr_scores, average="binary"),
+    "binary_tpr_fpr_tnr_fnr_scores": partial(
+        tpr_fpr_tnr_fnr_scores, average="binary"
+    ),
     "specificity_score": specificity_score,
     "binary_specificity_score": partial(specificity_score, average="binary"),
     "weighted_f0.5_score": partial(fbeta_score, average="weighted", beta=0.5),
@@ -156,18 +157,21 @@
     "weighted_precision_score": partial(precision_score, average="weighted"),
     "weighted_recall_score": partial(recall_score, average="weighted"),
     "weighted_jaccard_score": partial(jaccard_score, average="weighted"),
-    "weighted_tpr_fpr_tnr_fnr_scores":
-    partial(tpr_fpr_tnr_fnr_scores, average="weighted"),
-    "weighted_specificity_score":
-    partial(specificity_score, average="weighted"),
+    "weighted_tpr_fpr_tnr_fnr_scores": partial(
+        tpr_fpr_tnr_fnr_scores, average="weighted"
+    ),
+    "weighted_specificity_score": partial(
+        specificity_score, average="weighted"
+    ),
     "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5),
     "micro_f1_score": partial(f1_score, average="micro"),
     "micro_f2_score": partial(fbeta_score, average="micro", beta=2),
     "micro_precision_score": partial(precision_score, average="micro"),
     "micro_recall_score": partial(recall_score, average="micro"),
     "micro_jaccard_score": partial(jaccard_score, average="micro"),
-    "micro_tpr_fpr_tnr_fnr_scores":
-    partial(tpr_fpr_tnr_fnr_scores, average="micro"),
+    "micro_tpr_fpr_tnr_fnr_scores": partial(
+        tpr_fpr_tnr_fnr_scores, average="micro"
+    ),
     "micro_specificity_score": partial(specificity_score, average="micro"),
     "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5),
     "macro_f1_score": partial(f1_score, average="macro"),
@@ -175,8 +179,9 @@
     "macro_precision_score": partial(precision_score, average="macro"),
     "macro_recall_score": partial(recall_score, average="macro"),
     "macro_jaccard_score": partial(jaccard_score, average="macro"),
-    "macro_tpr_fpr_tnr_fnr_scores":
-    partial(tpr_fpr_tnr_fnr_scores, average="macro"),
+    "macro_tpr_fpr_tnr_fnr_scores": partial(
+        tpr_fpr_tnr_fnr_scores, average="macro"
+    ),
     "macro_specificity_score": partial(specificity_score, average="macro"),
     "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5),
     "samples_f1_score": partial(f1_score, average="samples"),
@@ -184,8 +189,9 @@
     "samples_precision_score": partial(precision_score, average="samples"),
     "samples_recall_score": partial(recall_score, average="samples"),
     "samples_jaccard_score": partial(jaccard_score, average="samples"),
-    "samples_tpr_fpr_tnr_fnr_scores":
-    partial(tpr_fpr_tnr_fnr_scores, average="samples"),
+    "samples_tpr_fpr_tnr_fnr_scores": partial(
+        tpr_fpr_tnr_fnr_scores, average="samples"
+    ),
     "samples_specificity_score": partial(specificity_score, average="samples"),
     "cohen_kappa_score": cohen_kappa_score,
 }

From aa76f66a08042a20bc01bf764f215b46a5b64ee1 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 12:28:09 +0200
Subject: [PATCH 045/127] Fix linting

Fix linting
---
 sklearn/metrics/tests/test_classification.py | 80 +++++++-------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 184a3b54c91c3..156730ae9e768 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -355,33 +355,25 @@ def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
     y_true, y_pred, _ = make_prediction(binary=True)
 
     # compute scores with default labels introspection
-    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average=None
-    )
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None)
     assert_array_almost_equal(tprs, [0.88, 0.68], 2)
     assert_array_almost_equal(fprs, [0.32, 0.12], 2)
     assert_array_almost_equal(tnrs, [0.68, 0.88], 2)
     assert_array_almost_equal(fnrs, [0.12, 0.32], 2)
 
-    tn, fp, fn, tp = assert_no_warnings(
-        confusion_matrix, y_true, y_pred
-    ).ravel()
+    tn, fp, fn, tp = assert_no_warnings(confusion_matrix, y_true, y_pred).ravel()
     assert_array_almost_equal(tp / (tp + fn), 0.68, 2)
     assert_array_almost_equal(fp / (tn + fp), 0.12, 2)
     assert_array_almost_equal(tn / (tn + fp), 0.88, 2)
     assert_array_almost_equal(fn / (tp + fn), 0.32, 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average='macro'
-    )
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="macro")
     assert tpr == np.mean(tprs)
     assert fpr == np.mean(fprs)
     assert tnr == np.mean(tnrs)
     assert fnr == np.mean(fnrs)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average='weighted'
-    )
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="weighted")
     support = np.bincount(y_true)
     assert tpr == np.average(tprs, weights=support)
     assert fpr == np.average(fprs, weights=support)
@@ -394,34 +386,26 @@ def test_tpr_fpr_tnr_fnr_scores_multiclass():
     y_true, y_pred, _ = make_prediction(binary=False)
 
     # compute scores with default labels introspection
-    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average=None
-    )
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None)
     assert_array_almost_equal(tprs, [0.79, 0.1, 0.9], 2)
     assert_array_almost_equal(fprs, [0.08, 0.14, 0.45], 2)
     assert_array_almost_equal(tnrs, [0.92, 0.86, 0.55], 2)
     assert_array_almost_equal(fnrs, [0.21, 0.9, 0.1], 2)
 
     # averaging tests
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average='micro'
-    )
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="micro")
     assert_array_almost_equal(tpr, 0.53, 2)
     assert_array_almost_equal(fpr, 0.23, 2)
     assert_array_almost_equal(tnr, 0.77, 2)
     assert_array_almost_equal(fnr, 0.47, 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average='macro'
-    )
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="macro")
     assert_array_almost_equal(tpr, 0.6, 2)
     assert_array_almost_equal(fpr, 0.22, 2)
     assert_array_almost_equal(tnr, 0.78, 2)
     assert_array_almost_equal(fnr, 0.4, 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred, average='weighted'
-    )
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="weighted")
     assert_array_almost_equal(tpr, 0.53, 2)
     assert_array_almost_equal(fpr, 0.2, 2)
     assert_array_almost_equal(tnr, 0.8, 2)
@@ -449,9 +433,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
 
     zero_division_value = 1.0 if zero_division == 1.0 else 0.0
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true, y_pred,
-        average=None,
-        zero_division=zero_division
+        y_true, y_pred, average=None, zero_division=zero_division
     )
     assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division_value], 2)
     assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2)
@@ -459,10 +441,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division_value], 2)
 
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true,
-        y_pred,
-        average="macro",
-        zero_division=zero_division
+        y_true, y_pred, average="macro", zero_division=zero_division
     )
     assert_almost_equal(tpr, 0.625 if zero_division_value else 0.375)
     assert_almost_equal(fpr, 1 / 3.0 / 4.0)
@@ -470,10 +449,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true,
-        y_pred,
-        average="micro",
-        zero_division=zero_division
+        y_true, y_pred, average="micro", zero_division=zero_division
     )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.125)
@@ -481,10 +457,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_almost_equal(fnr, 0.5)
 
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
-        y_true,
-        y_pred,
-        average="weighted",
-        zero_division=zero_division
+        y_true, y_pred, average="weighted", zero_division=zero_division
     )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0)
@@ -496,7 +469,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
         y_pred,
         average="samples",
         sample_weight=[1, 1, 2],
-        zero_division=zero_division
+        zero_division=zero_division,
     )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.08333, 5)
@@ -1067,8 +1040,7 @@ def test_zero_precision_recall():
         assert_almost_equal(precision_score(y_true, y_pred, average="macro"), 0.0, 2)
         assert_almost_equal(recall_score(y_true, y_pred, average="macro"), 0.0, 2)
         assert_almost_equal(f1_score(y_true, y_pred, average="macro"), 0.0, 2)
-        assert_almost_equal(
-            specificity_score(y_true, y_pred, average='macro'), 0.5, 2)
+        assert_almost_equal(specificity_score(y_true, y_pred, average='macro'), 0.5, 2)
 
     finally:
         np.seterr(**old_error_settings)
@@ -2217,16 +2189,22 @@ def test_fscore_warnings(zero_division):
                 assert len(record) == 0
 
 
-@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+@pytest.mark.parametrize("zero_division", ["warn", 0, 1])
 def test_specificity_warnings(zero_division):
-    assert_no_warnings(specificity_score,
-                       np.array([[0, 0], [0, 0]]),
-                       np.array([[1, 1], [1, 1]]),
-                       average='micro', zero_division=zero_division)
-
-    specificity_score(np.array([[1, 1], [1, 1]]),
-                      np.array([[0, 0], [0, 0]]),
-                      average='micro', zero_division=zero_division)
+    assert_no_warnings(
+        specificity_score,
+        np.array([[0, 0], [0, 0]]),
+        np.array([[1, 1], [1, 1]]),
+        average="micro",
+        zero_division=zero_division,
+    )
+
+    specificity_score(
+        np.array([[1, 1], [1, 1]]),
+        np.array([[0, 0], [0, 0]]),
+        average="micro",
+        zero_division=zero_division,
+    )
     if zero_division == "warn":
         pytest.warns(Warning if zero_division == "warn" else None)
 

From 3e6ecd66a14e9351f6948710a43c92215d7a4b0a Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 12:36:51 +0200
Subject: [PATCH 046/127] Fix linting

---
 sklearn/metrics/tests/test_score_objects.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 64f90e0c4dadb..79450592a56e5 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1082,8 +1082,7 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem):
 
 
 @pytest.mark.parametrize(
-    "score_func", [f1_score, precision_score, recall_score, jaccard_score,
-                   specificity_score]
+    "score_func", [f1_score, precision_score, recall_score, jaccard_score, specificity_score]
 )
 def test_non_symmetric_metric_pos_label(
     score_func, string_labeled_classification_problem

From 54b5bfba5a16cc1fa0fe162575918b3d1e837edf Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 12:40:46 +0200
Subject: [PATCH 047/127] Fix linting

Add specifity test to test_ovr_multilabel_dataset

Fix linting
---
 sklearn/tests/test_multiclass.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index d1c191dbf9199..03e6ec0540606 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -338,8 +338,8 @@ def test_ovr_fit_predict_svc():
 def test_ovr_multilabel_dataset():
     base_clf = MultinomialNB(alpha=1)
     for au, prec, recall, specificity in zip(
-            (True, False), (0.51, 0.66), (0.51, 0.80)
-            ):
+        (True, False), (0.51, 0.66), (0.51, 0.80), (0.66, 0.71)
+    ):
         X, Y = datasets.make_multilabel_classification(
             n_samples=100,
             n_features=20,
@@ -362,8 +362,7 @@ def test_ovr_multilabel_dataset():
             recall_score(Y_test, Y_pred, average="micro"), recall, decimal=2
         )
         assert_almost_equal(
-            specificity_score(
-                Y_test, Y_pred, average="micro"), specificity, decimal=2
+            specificity_score(Y_test, Y_pred, average="micro"), specificity, decimal=2
         )
 
 
From eb56ee0ba1faef78ee998f0692692f8f1b1b436b Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 15:04:40 +0200
Subject: [PATCH 048/127] Fix linting

---
 sklearn/metrics/tests/test_score_objects.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 79450592a56e5..72cbb9b8260d5 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1082,7 +1082,8 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem):
 
 
 @pytest.mark.parametrize(
-    "score_func", [f1_score, precision_score, recall_score, jaccard_score, specificity_score]
+    "score_func",
+    [f1_score, precision_score, recall_score, jaccard_score, specificity_score],
 )
 def test_non_symmetric_metric_pos_label(
     score_func, string_labeled_classification_problem

From d2d150b10d71384d3011ef98e7d09adf6e9e467d Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 15:14:54 +0200
Subject: [PATCH 049/127] Fix linting

---
 sklearn/metrics/tests/test_common.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index ff561ec796d4b..249782f54efae 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -146,9 +146,7 @@
     "f0.5_score": partial(fbeta_score, beta=0.5),
     "matthews_corrcoef_score": matthews_corrcoef,
     "tpr_fpr_tnr_fnr_scores": tpr_fpr_tnr_fnr_scores,
-    "binary_tpr_fpr_tnr_fnr_scores": partial(
-        tpr_fpr_tnr_fnr_scores, average="binary"
-    ),
+    "binary_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="binary"),
     "specificity_score": specificity_score,
     "binary_specificity_score": partial(specificity_score, average="binary"),
     "weighted_f0.5_score": partial(fbeta_score, average="weighted", beta=0.5),
@@ -160,18 +158,14 @@
     "weighted_tpr_fpr_tnr_fnr_scores": partial(
         tpr_fpr_tnr_fnr_scores, average="weighted"
     ),
-    "weighted_specificity_score": partial(
-        specificity_score, average="weighted"
-    ),
+    "weighted_specificity_score": partial(specificity_score, average="weighted"),
     "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5),
     "micro_f1_score": partial(f1_score, average="micro"),
     "micro_f2_score": partial(fbeta_score, average="micro", beta=2),
     "micro_precision_score": partial(precision_score, average="micro"),
     "micro_recall_score": partial(recall_score, average="micro"),
     "micro_jaccard_score": partial(jaccard_score, average="micro"),
-    "micro_tpr_fpr_tnr_fnr_scores": partial(
-        tpr_fpr_tnr_fnr_scores, average="micro"
-    ),
+    "micro_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="micro"),
     "micro_specificity_score": partial(specificity_score, average="micro"),
     "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5),
     "macro_f1_score": partial(f1_score, average="macro"),
@@ -179,9 +173,7 @@
     "macro_precision_score": partial(precision_score, average="macro"),
     "macro_recall_score": partial(recall_score, average="macro"),
     "macro_jaccard_score": partial(jaccard_score, average="macro"),
-    "macro_tpr_fpr_tnr_fnr_scores": partial(
-        tpr_fpr_tnr_fnr_scores, average="macro"
-    ),
+    "macro_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="macro"),
     "macro_specificity_score": partial(specificity_score, average="macro"),
     "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5),
     "samples_f1_score": partial(f1_score, average="samples"),

From 66a6bb40cc71448de4d889ba101997cc5c1d45b7 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 15:21:23 +0200
Subject: [PATCH 050/127] Fix linting

---
 sklearn/metrics/tests/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 156730ae9e768..b148fc376f13b 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -424,7 +424,7 @@ def test_tpr_fpr_tnr_fnr_scores_multiclass():
     assert_array_almost_equal(fnr, [0.21, 0.1, 0.9], 2)
 
 
-@pytest.mark.parametrize('zero_division', ["warn", 0, 1])
+@pytest.mark.parametrize("zero_division", ["warn", 0, 1])
 def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])

From ff585f16126387008a491cc52f46a3adecf8d0cd Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 15:27:47 +0200
Subject: [PATCH 051/127] Fix linting

Fix linting
---
 sklearn/metrics/_classification.py | 61 +++++++++++++++++-------------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 4631f3dd803ac..6db1e4d408b72 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -544,8 +544,7 @@ def multilabel_confusion_matrix(
                 raise ValueError(
                     "All labels must be in [0, n labels) for "
                     "multilabel targets. "
-                    "Got %d < 0"
-                    % np.min(labels)
+                    "Got %d < 0" % np.min(labels)
                 )
 
         if n_labels is not None:
@@ -2659,9 +2658,9 @@ def tpr_fpr_tnr_fnr_scores(
     labels=None,
     pos_label=1,
     average=None,
-    warn_for=('tpr', 'fpr', 'tnr', 'fnr'),
+    warn_for=("tpr", "fpr", "tnr", "fnr"),
     sample_weight=None,
-    zero_division="warn"
+    zero_division="warn",
 ):
     """Compute True Positive Rate (TPR), False Positive Rate (FPR),\
     True Negative Rate (TNR), False Negative Rate (FNR) for each class
@@ -2806,10 +2805,14 @@ def tpr_fpr_tnr_fnr_scores(
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
     # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pos_sum, neg_sum
-    samplewise = average == 'samples'
-    MCM = multilabel_confusion_matrix(y_true, y_pred,
-                                      sample_weight=sample_weight,
-                                      labels=labels, samplewise=samplewise)
+    samplewise = average == "samples"
+    MCM = multilabel_confusion_matrix(
+        y_true,
+        y_pred,
+        sample_weight=sample_weight,
+        labels=labels,
+        samplewise=samplewise,
+    )
     tn_sum = MCM[:, 0, 0]
     fp_sum = MCM[:, 0, 1]
     fn_sum = MCM[:, 1, 0]
@@ -2817,7 +2820,7 @@ def tpr_fpr_tnr_fnr_scores(
     neg_sum = tn_sum + fp_sum
     pos_sum = fn_sum + tp_sum
 
-    if average == 'micro':
+    if average == "micro":
         tp_sum = np.array([tp_sum.sum()])
         fp_sum = np.array([fp_sum.sum()])
         tn_sum = np.array([tn_sum.sum()])
@@ -2827,33 +2830,39 @@ def tpr_fpr_tnr_fnr_scores(
 
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
-    tpr = _prf_divide(tp_sum, pos_sum, 'TPR', 'positives',
-                      average, warn_for, zero_division)
-    fpr = _prf_divide(fp_sum, neg_sum, 'FPR', 'negatives',
-                      average, warn_for, zero_division)
-    tnr = _prf_divide(tn_sum, neg_sum, 'TNR', 'negatives',
-                      average, warn_for, zero_division)
-    fnr = _prf_divide(fn_sum, pos_sum, 'FNR', 'positives',
-                      average, warn_for, zero_division)
+    tpr = _prf_divide(
+        tp_sum, pos_sum, "TPR", "positives", average, warn_for, zero_division
+    )
+    fpr = _prf_divide(
+        fp_sum, neg_sum, "FPR", "negatives", average, warn_for, zero_division
+    )
+    tnr = _prf_divide(
+        tn_sum, neg_sum, "TNR", "negatives", average, warn_for, zero_division
+    )
+    fnr = _prf_divide(
+        fn_sum, pos_sum, "FNR", "positives", average, warn_for, zero_division
+    )
     # Average the results
-    if average == 'weighted':
+    if average == "weighted":
         weights = pos_sum
         if weights.sum() == 0:
             zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
             # TPR and FNR is zero_division if there are no positive labels
             # FPR and TNR is zero_division if there are no negative labels
-            return (zero_division_value if pos_sum.sum() == 0 else 0,
-                    zero_division_value if neg_sum.sum() == 0 else 0,
-                    zero_division_value if neg_sum.sum() == 0 else 0,
-                    zero_division_value if pos_sum.sum() == 0 else 0)
+            return (
+                zero_division_value if pos_sum.sum() == 0 else 0,
+                zero_division_value if neg_sum.sum() == 0 else 0,
+                zero_division_value if neg_sum.sum() == 0 else 0,
+                zero_division_value if pos_sum.sum() == 0 else 0,
+            )
 
-    elif average == 'samples':
+    elif average == "samples":
         weights = sample_weight
     else:
         weights = None
 
     if average is not None:
-        assert average != 'binary' or len(fpr) == 1
+        assert average != "binary" or len(fpr) == 1
         fpr = np.average(fpr, weights=weights)
         tnr = np.average(tnr, weights=weights)
         fnr = np.average(fnr, weights=weights)
@@ -2867,9 +2876,9 @@ def specificity_score(
     *,
     labels=None,
     pos_label=1,
-    average='binary',
+    average="binary",
     sample_weight=None,
-    zero_division="warn"
+    zero_division="warn",
 ):
     """Compute specificity, also known as true negative rate.
 

From ff791599381f48cb1f8bbc626d362383a97d889c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 15:41:16 +0200
Subject: [PATCH 052/127] Fix linting

---
 sklearn/metrics/tests/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index b148fc376f13b..431b8c258eeb5 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1040,7 +1040,7 @@ def test_zero_precision_recall():
         assert_almost_equal(precision_score(y_true, y_pred, average="macro"), 0.0, 2)
         assert_almost_equal(recall_score(y_true, y_pred, average="macro"), 0.0, 2)
         assert_almost_equal(f1_score(y_true, y_pred, average="macro"), 0.0, 2)
-        assert_almost_equal(specificity_score(y_true, y_pred, average='macro'), 0.5, 2)
+        assert_almost_equal(specificity_score(y_true, y_pred, average="macro"), 0.5, 2)
 
     finally:
         np.seterr(**old_error_settings)

From 83f2788bf30e94e3e3a308b23e6ae807d08f34de Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 18:14:20 +0200
Subject: [PATCH 053/127] Fix docs formatting

---
 sklearn/metrics/_classification.py | 98 ++++++++++++++----------------
 1 file changed, 47 insertions(+), 51 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 6db1e4d408b72..aab7722335ee9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2662,25 +2662,24 @@ def tpr_fpr_tnr_fnr_scores(
     sample_weight=None,
     zero_division="warn",
 ):
-    """Compute True Positive Rate (TPR), False Positive Rate (FPR),\
-    True Negative Rate (TNR), False Negative Rate (FNR) for each class
+    """Compute TPR, FPR, TNR, FNR for each class.
 
-    The TPR is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
-    true positives and ``fn`` the number of false negatives.
+    The TPR is the ratio `tp / (tp + fn)` where `tp` is the number of
+    true positives and `fn` the number of false negatives.
 
-    The FPR is the ratio ``fp / (tn + fp)`` where ``tn`` is the number of
-    true negatives and ``fp`` the number of false positives.
+    The FPR is the ratio `fp / (tn + fp)` where `tn` is the number of
+    true negatives and `fp` the number of false positives.
 
-    The TNR is the ratio ``tn / (tn + fp)`` where ``tn`` is the number of
-    true negatives and ``fp`` the number of false positives.
+    The TNR is the ratio `tn / (tn + fp)` where `tn` is the number of
+    true negatives and `fp` the number of false positives.
 
-    The FNR is the ratio ``fn / (tp + fn)`` where ``tp`` is the number of
-    true positives and ``fn`` the number of false negatives.
+    The FNR is the ratio `fn / (tp + fn)` where `tp` is the number of
+    true positives and `fn` the number of false negatives.
 
-    If ``pos_label is None`` and in binary classification, this function
+    If `pos_label is None` and in binary classification, this function
     returns the true positive rate, false positive rate, true negative rate
-    and false negative rate if ``average`` is one of ``'micro'``, ``'macro'``,
-    ``'weighted'`` or ``'samples'``.
+    and false negative rate if `average` is one of `"micro"`, `"macro"`,
+    `"weighted"` or `"samples"`.
 
     Parameters
     ----------
@@ -2702,30 +2701,30 @@ def tpr_fpr_tnr_fnr_scores(
         ``y_pred`` are used in sorted order.
 
     pos_label : str or int, default=1
-        The class to report if ``average='binary'`` and the data is binary.
+        The class to report if `average="binary"` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
-        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
+        setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
 
-    average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'}, \
+    average : str, {None, "binary", "micro", "macro", "samples", "weighted"}, \
         default=None
-        If ``None``, the scores for each class are returned. Otherwise, this
+        If `None`, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
-        ``'binary'``:
-            Only report results for the class specified by ``pos_label``.
-            This is applicable only if targets (``y_{true,pred}``) are binary.
-        ``'micro'``:
+        `"binary"`:
+            Only report results for the class specified by `pos_label`.
+            This is applicable only if targets (`y_{true,pred}`) are binary.
+        `"micro"`:
             Calculate metrics globally by counting the total true positives,
             false negatives and false positives.
-        ``'macro'``:
+        `"macro"`:
             Calculate metrics for each label, and find their unweighted
             mean.  This does not take label imbalance into account.
-        ``'weighted'``:
+        `"weighted"`:
             Calculate metrics for each label, and find their average weighted
             by support (the number of true instances for each label). This
             alters 'macro' to account for label imbalance.
-        ``'samples'``:
+        `"samples"`:
             Calculate metrics for each instance, and find their average (only
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
@@ -2737,7 +2736,7 @@ def tpr_fpr_tnr_fnr_scores(
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    zero_division : str or int, {'warn', 0, 1}, default="warn"
+    zero_division : str or int, {"warn", 0, 1}, default="warn"
         Sets the value to return when there is a zero division:
            - tpr, fnr: when there are no positive labels
            - fpr, tnr: when there are no negative labels
@@ -2757,7 +2756,7 @@ def tpr_fpr_tnr_fnr_scores(
 
     fnr : float (if average is not None), \
         or ndarray of shape (n_unique_labels,)
-        The number of occurrences of each label in ``y_true``.
+        The number of occurrences of each label in `y_true`.
 
     References
     ----------
@@ -2775,14 +2774,11 @@ def tpr_fpr_tnr_fnr_scores(
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro')
-    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
-     0.6666666666666666)
+    (0.33..., 0.33..., 0.66..., 0.66...)
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro')
-    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
-     0.6666666666666666)
+    (0.33..., 0.33..., 0.66..., 0.66...)
     >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted')
-    (0.3333333333333333, 0.3333333333333333, 0.6666666666666666,
-    0.6666666666666666)
+    (0.33..., 0.33..., 0.66..., 0.66...)
 
     It is possible to compute per-label FPR, FNR, TNR, TPR and
     supports instead of averaging:
@@ -2794,11 +2790,11 @@ def tpr_fpr_tnr_fnr_scores(
 
     Notes
     -----
-    When ``true positive + false negative == 0``, TPR, FNR are undefined;
-    When ``true negative + false positive == 0``, FPR, TNR are undefined.
+    When `true positive + false negative == 0`, TPR, FNR are undefined;
+    When `true negative + false positive == 0`, FPR, TNR are undefined.
     In such cases, by default the metric will be set to 0, as will F-score,
-    and ``UndefinedMetricWarning`` will be raised. This behavior can be
-    modified with ``zero_division``.
+    and `UndefinedMetricWarning` will be raised. This behavior can be
+    modified with `zero_division`.
     """
     _check_zero_division(zero_division)
 
@@ -2882,8 +2878,8 @@ def specificity_score(
 ):
     """Compute specificity, also known as true negative rate.
 
-    The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number
-    of true negatives and ``fp`` is the number of false positives.
+    The specificity is the ratio `tn / (tn + fp)` where `tn` is the number
+    of true negatives and `fp` is the number of false positives.
     The specificity is intuitively the ability of the classifier to find
     all the negative samples.
 
@@ -2898,7 +2894,7 @@ def specificity_score(
         Estimated targets as returned by a classifier.
 
     labels : array-like, default=None
-        The set of labels to include when ``average != 'binary'``, and their
+        The set of labels to include when `average != "binary"`, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
         majority negative class, while labels not present in the data will
@@ -2907,9 +2903,9 @@ def specificity_score(
         ``y_pred`` are used in sorted order.
 
     pos_label : str or int, default=1
-        The class to report if ``average='binary'`` and the data is binary.
+        The class to report if `average="binary"` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
-        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
+        setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
 
     average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'} \
@@ -2918,21 +2914,21 @@ def specificity_score(
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
-        ``'binary'``:
-            Only report results for the class specified by ``pos_label``.
-            This is applicable only if targets (``y_{true,pred}``) are binary.
-        ``'micro'``:
+        `"binary"`:
+            Only report results for the class specified by `pos_label`.
+            This is applicable only if targets (`y_{true,pred}`) are binary.
+        `"micro"`:
             Calculate metrics globally by counting the total true positives,
             false negatives and false positives.
-        ``'macro'``:
+        `"macro"`:
             Calculate metrics for each label, and find their unweighted
             mean.  This does not take label imbalance into account.
-        ``'weighted'``:
+        `"weighted"`:
             Calculate metrics for each label, and find their average weighted
             by support (the number of true instances for each label). This
             alters 'macro' to account for label imbalance; it can result in an
             F-score that is not between precision and recall.
-        ``'samples'``:
+        `"samples"`:
             Calculate metrics for each instance, and find their average (only
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
@@ -2959,9 +2955,9 @@ def specificity_score(
 
     Notes
     -----
-    When ``true negative + false positive == 0``, specificity returns 0 and
-    raises ``UndefinedMetricWarning``. This behavior can be modified with
-    ``zero_division``.
+    When `true negative + false positive == 0`, specificity returns 0 and
+    raises `UndefinedMetricWarning`. This behavior can be modified with
+    `zero_division`.
 
     References
     ----------

From 657eac71469f7043b9a389c3cd3001e084f13f7e Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 13 Sep 2021 19:18:12 +0200
Subject: [PATCH 054/127] Fix linting in _classification.py

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Linting troubleshooting

Fix linting in _classification.py

FIx linting in _classification.py

Troubleshooting test of _classification.py linting

Fix linting
---
 sklearn/metrics/_classification.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index aab7722335ee9..b9cf9e23c507d 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -544,7 +544,8 @@ def multilabel_confusion_matrix(
                 raise ValueError(
                     "All labels must be in [0, n labels) for "
                     "multilabel targets. "
-                    "Got %d < 0" % np.min(labels)
+                    "Got %d < 0"
+                    % np.min(labels)
                 )
 
         if n_labels is not None:
@@ -2290,8 +2291,7 @@ def log_loss(
     y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
-    y_pred : array-like of float, shape = (n_samples, n_classes) \
-            or (n_samples,)
+    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
         Predicted probabilities, as returned by a classifier's
         predict_proba method. If ``y_pred.shape = (n_samples,)``
         the probabilities provided are assumed to be that of the
@@ -2658,7 +2658,7 @@ def tpr_fpr_tnr_fnr_scores(
     labels=None,
     pos_label=1,
     average=None,
-    warn_for=("tpr", "fpr", "tnr", "fnr"),
+    warn_for=("TPR", "FPR", "TNR", "FNR"),
     sample_weight=None,
     zero_division="warn",
 ):
@@ -2692,13 +2692,13 @@ def tpr_fpr_tnr_fnr_scores(
         Estimated targets as returned by a classifier.
 
     labels : list, default=None
-        The set of labels to include when ``average != 'binary'``, and their
-        order if ``average is None``. Labels present in the data can be
+        The set of labels to include when `average != "binary"`, and their
+        order if `average is None`. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
         majority negative class, while labels not present in the data will
         result in 0 components in a macro average. For multilabel targets,
-        labels are column indices. By default, all labels in ``y_true`` and
-        ``y_pred`` are used in sorted order.
+        labels are column indices. By default, all labels in `y_true` and
+        `y_pred` are used in sorted order.
 
     pos_label : str or int, default=1
         The class to report if `average="binary"` and the data is binary.
@@ -2895,12 +2895,12 @@ def specificity_score(
 
     labels : array-like, default=None
         The set of labels to include when `average != "binary"`, and their
-        order if ``average is None``. Labels present in the data can be
+        order if `average is None`. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
         majority negative class, while labels not present in the data will
         result in 0 components in a macro average. For multilabel targets,
-        labels are column indices. By default, all labels in ``y_true`` and
-        ``y_pred`` are used in sorted order.
+        labels are column indices. By default, all labels in `y_true` and
+        `y_pred` are used in sorted order.
 
     pos_label : str or int, default=1
         The class to report if `average="binary"` and the data is binary.
@@ -2911,7 +2911,7 @@ def specificity_score(
     average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'} \
             default='binary'
         This parameter is required for multiclass/multilabel targets.
-        If ``None``, the scores for each class are returned. Otherwise, this
+        If `None`, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
         `"binary"`:
@@ -2989,7 +2989,7 @@ def specificity_score(
         labels=labels,
         pos_label=pos_label,
         average=average,
-        warn_for=("tnr",),
+        warn_for=("TNR",),
         sample_weight=sample_weight,
         zero_division=zero_division,
     )

From 0426087b9e376b29c6037da5445032191d284717 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 10:55:00 +0200
Subject: [PATCH 055/127] Doc fixes in _classification.py

---
 sklearn/metrics/_classification.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b9cf9e23c507d..d39a69ece411f 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2664,17 +2664,17 @@ def tpr_fpr_tnr_fnr_scores(
 ):
     """Compute TPR, FPR, TNR, FNR for each class.
 
-    The TPR is the ratio `tp / (tp + fn)` where `tp` is the number of
+    The TPR is the ratio `TP / (TP + FN)` where `tp` is the number of
     true positives and `fn` the number of false negatives.
 
-    The FPR is the ratio `fp / (tn + fp)` where `tn` is the number of
-    true negatives and `fp` the number of false positives.
+    The FPR is the ratio `FP / (TN + FP)` where `tn` is the number of
+    true negatives and `FP` the number of false positives.
 
-    The TNR is the ratio `tn / (tn + fp)` where `tn` is the number of
-    true negatives and `fp` the number of false positives.
+    The TNR is the ratio `TN / (TN + FP)` where `tn` is the number of
+    true negatives and `FP` the number of false positives.
 
-    The FNR is the ratio `fn / (tp + fn)` where `tp` is the number of
-    true positives and `fn` the number of false negatives.
+    The FNR is the ratio `FN / (TP + FN)` where `tp` is the number of
+    true positives and `FN` the number of false negatives.
 
     If `pos_label is None` and in binary classification, this function
     returns the true positive rate, false positive rate, true negative rate
@@ -2738,8 +2738,8 @@ def tpr_fpr_tnr_fnr_scores(
 
     zero_division : str or int, {"warn", 0, 1}, default="warn"
         Sets the value to return when there is a zero division:
-           - tpr, fnr: when there are no positive labels
-           - fpr, tnr: when there are no negative labels
+           - TPR, FNR: when there are no positive labels
+           - FPR, TNR: when there are no negative labels
 
         If set to "warn", this acts as 0, but warnings are also raised.
 
@@ -2878,8 +2878,8 @@ def specificity_score(
 ):
     """Compute specificity, also known as true negative rate.
 
-    The specificity is the ratio `tn / (tn + fp)` where `tn` is the number
-    of true negatives and `fp` is the number of false positives.
+    The specificity is the ratio `TN / (TN + FP)` where `TN` is the number
+    of true negatives and `FP` is the number of false positives.
     The specificity is intuitively the ability of the classifier to find
     all the negative samples.
 
@@ -2908,8 +2908,8 @@ def specificity_score(
         setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
 
-    average : str, {None, 'binary', 'micro', 'macro', 'samples', 'weighted'} \
-            default='binary'
+    average : str, {None, "binary", "micro", "macro", "samples", "weighted"} \
+            default="binary"
         This parameter is required for multiclass/multilabel targets.
         If `None`, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:

From 636f87c692ba4fac2761cde7967eae5f28d8f4a9 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 11:47:18 +0200
Subject: [PATCH 056/127] Revert change to v1.0.rst

---
 doc/whats_new/v1.0.rst | 366 +++++++++++++++++++++++------------------
 1 file changed, 205 insertions(+), 161 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 8dcc12268c0ec..d8083fe6c6a9e 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -30,8 +30,6 @@ parameter is used as positional, a `TypeError` is now raised.
 <https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep009/proposal.html>`_
 for more details.
 
-Put the changes in their relevant module.
-
 Changed models
 --------------
 
@@ -135,13 +133,14 @@ Changelog
   1.2. :pr:`20165` by `Thomas Fan`_.
 
 - |API| :term:`get_feature_names_out` has been added to the transformer API
-  to get the names of the output features. :pr:`18444` by `Thomas Fan`_.
+  to get the names of the output features. :term:`get_feature_names` has in
+  turn been deprecated. :pr:`18444` by `Thomas Fan`_.
 
 - |API| All estimators store `feature_names_in_` when fitted on pandas Dataframes.
-  These feature names are compared to names seen in `non-fit` methods,
-  `i.e.` `transform` and will raise a `FutureWarning` if they are not consistent.
-  These `FutureWarning`s will become `ValueError`s in 1.2.
-  :pr:`18010` by `Thomas Fan`_.
+  These feature names are compared to names seen in non-`fit` methods, e.g.
+  `transform` and will raise a `FutureWarning` if they are not consistent.
+  These ``FutureWarning`` s will become ``ValueError`` s in 1.2. :pr:`18010` by
+  `Thomas Fan`_.
 
 :mod:`sklearn.base`
 ...................
@@ -154,7 +153,7 @@ Changelog
 - |Feature| :func:`calibration.CalibrationDisplay` added to plot
   calibration curves. :pr:`17443` by :user:`Lucy Liu <lucyleeow>`.
 
-- |Fix| The predict and predict_proba methods of
+- |Fix| The ``predict`` and ``predict_proba`` methods of
   :class:`calibration.CalibratedClassifierCV` can now properly be used on
   prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`.
 
@@ -166,10 +165,10 @@ Changelog
 :mod:`sklearn.cluster`
 ......................
 
-- |Efficiency| The "k-means++" initialization of :class:`cluster.KMeans` and
-  :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
-  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and
-  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Efficiency| The ``"k-means++"`` initialization of :class:`cluster.KMeans`
+  and :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
+  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and :user:`Jérémie du
+  Boisberranger <jeremiedbb>`.
 
 - |Efficiency| :class:`cluster.KMeans` with `algorithm='elkan'` is now faster
   in multicore settings. :pr:`19052` by
@@ -216,11 +215,11 @@ Changelog
   deprecated and will be removed in 1.2. :pr:`19297` by `Thomas Fan`_.
 
 - |API| the default value for the `batch_size` parameter of
-  :class:`MiniBatchKMeans` was changed from 100 to 1024 due to efficiency
-  reasons. The `n_iter_` attribute of :class:`MiniBatchKMeans` now reports the
-  number of started epochs and the `n_steps_` attribute reports the number of
-  mini batches processed. :pr:`17622`
-  by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  :class:`cluster.MiniBatchKMeans` was changed from 100 to 1024 due to
+  efficiency reasons. The `n_iter_` attribute of
+  :class:`cluster.MiniBatchKMeans` now reports the number of started epochs and
+  the `n_steps_` attribute reports the number of mini batches processed.
+  :pr:`17622` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |API| :func:`cluster.spectral_clustering` raises an improved error when passed
   a `np.matrix`. :pr:`20560` by `Thomas Fan`_.
@@ -228,10 +227,6 @@ Changelog
 :mod:`sklearn.compose`
 ......................
 
-- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
-  This flag controls the prefixing of feature names out in
-  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
-
 - |Enhancement| :class:`compose.ColumnTransformer` now records the output
   of each transformer in `output_indices_`. :pr:`18393` by
   :user:`Luca Bittarello <lbittarello>`.
@@ -247,20 +242,25 @@ Changelog
   :pr:`19244` by :user:`Ricardo <ricardojnf>`.
 
 - |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports
-  non-string feature names returned by any of its transformers.
-  :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>` and
-  :user:`Alonso Silva Allende <alonsosilvaallende>`.
+  non-string feature names returned by any of its transformers. However, note
+  that ``get_feature_names`` is deprecated, use ``get_feature_names_out``
+  instead. :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>`
+  and :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
 - |Fix| :class:`compose.TransformedTargetRegressor` now takes nD targets with
   an adequate transformer.
   :pr:`18898` by :user:`Oras Phongpanagnam <panangam>`.
 
+- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
+  This flag controls the prefixing of feature names out in
+  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
+
 :mod:`sklearn.covariance`
 .........................
 
 - |Fix| Adds arrays check to :func:`covariance.ledoit_wolf` and
-  :func:`covariance.ledoit_wolf_shrinkage`.
-  :pr:`20416` by :user:`Hugo Defois <defoishugo>`.
+  :func:`covariance.ledoit_wolf_shrinkage`. :pr:`20416` by :user:`Hugo Defois
+  <defoishugo>`.
 
 - |API| Deprecates the following keys in `cv_results_`: `'mean_score'`,
   `'std_score'`, and `'split(k)_score'` in favor of `'mean_test_score'`
@@ -308,28 +308,31 @@ Changelog
 - |Fix| Fixes incorrect multiple data-conversion warnings when clustering
   boolean data. :pr:`19046` by :user:`Surya Prakash <jdsurya>`.
 
-- |Fix| Fixed :func:`dict_learning`, used by :class:`DictionaryLearning`, to
-  ensure determinism of the output. Achieved by flipping signs of the SVD
-  output which is used to initialize the code.
-  :pr:`18433` by :user:`Bruno Charron <brcharron>`.
+- |Fix| Fixed :func:`dict_learning`, used by
+  :class:`decomposition.DictionaryLearning`, to ensure determinism of the
+  output. Achieved by flipping signs of the SVD output which is used to
+  initialize the code. :pr:`18433` by :user:`Bruno Charron <brcharron>`.
 
-- |Fix| Fixed a bug in :class:`MiniBatchDictionaryLearning`,
-  :class:`MiniBatchSparsePCA` and :func:`dict_learning_online` where the
-  update of the dictionary was incorrect. :pr:`19198` by
-  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Fix| Fixed a bug in :class:`decomposition.MiniBatchDictionaryLearning`,
+  :class:`decomposition.MiniBatchSparsePCA` and
+  :func:`decomposition.dict_learning_online` where the update of the dictionary
+  was incorrect. :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| Fixed a bug in :class:`DictionaryLearning`, :class:`SparsePCA`,
-  :class:`MiniBatchDictionaryLearning`, :class:`MiniBatchSparsePCA`,
-  :func:`dict_learning` and :func:`dict_learning_online` where the restart of
-  unused atoms during the dictionary update was not working as expected.
-  :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Fix| Fixed a bug in :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.SparsePCA`,
+  :class:`decomposition.MiniBatchDictionaryLearning`,
+  :class:`decomposition.MiniBatchSparsePCA`,
+  :func:`decomposition.dict_learning` and
+  :func:`decomposition.dict_learning_online` where the restart of unused atoms
+  during the dictionary update was not working as expected. :pr:`19198` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |API| In :class:`decomposition.DictionaryLearning`,
   :class:`decomposition.MiniBatchDictionaryLearning`,
-  :func:`dict_learning` and :func:`dict_learning_online`,
-  `transform_alpha` will be equal to `alpha` instead of 1.0 by default
-  starting from version 1.2
-  :pr:`19159` by :user:`Benoît Malézieux <bmalezieux>`.
+  :func:`decomposition.dict_learning` and
+  :func:`decomposition.dict_learning_online`, `transform_alpha` will be equal
+  to `alpha` instead of 1.0 by default starting from version 1.2 :pr:`19159` by
+  :user:`Benoît Malézieux <bmalezieux>`.
 
 - |API| Rename variable names in :class:`KernelPCA` to improve
   readability. `lambdas_` and `alphas_` are renamed to `eigenvalues_`
@@ -368,9 +371,9 @@ Changelog
   :class:`ensemble.StackingClassifier` and :class:`ensemble.StackingRegressor`.
   :pr:`19564` by `Thomas Fan`_.
 
-- |Enhancement| Documented and tested support of the Poisson criterion for
-  :class:`ensemble.RandomForestRegressor`. :pr:`19836` by
-  :user:`Brian Sun <bsun94>`.
+- |Enhancement| Added Poisson criterion to
+  :class:`ensemble.RandomForestRegressor`. :pr:`19836` by :user:`Brian Sun
+  <bsun94>`.
 
 - |Fix| Do not allow to compute out-of-bag (OOB) score in
   :class:`ensemble.RandomForestClassifier` and
@@ -384,7 +387,7 @@ Changelog
   to avoid underflows.
   :pr:`10096` by :user:`Fenil Suchak <fenilsuchak>`.
 
-- |Fix| Fixed the range of the argument max_samples to be (0.0, 1.0]
+- |Fix| Fixed the range of the argument ``max_samples`` to be ``(0.0, 1.0]``
   in :class:`ensemble.RandomForestClassifier`,
   :class:`ensemble.RandomForestRegressor`, where `max_samples=1.0` is
   interpreted as using all `n_samples` for bootstrapping. :pr:`20159` by
@@ -435,10 +438,10 @@ Changelog
   :user:`Gil Rutter <g-rutter>`, and :user:`Adrin Jalali <adrinjalali>`.
 
 - |FIX| Fix a bug in :func:`isotonic.isotonic_regression` where the
-  `sample_weight` passed by a user were overwritten during the fit.
+  `sample_weight` passed by a user were overwritten during ``fit``.
   :pr:`20515` by :user:`Carsten Allefeld <allefeld>`.
 
-- |Fix| Change :func:`feature_selection.SequentialFeatureSelection` to
+- |Fix| Change :func:`feature_selection.SequentialFeatureSelector` to
   allow for unsupervised modelling so that the `fit` signature need not
   do any `y` validation and allow for `y=None`.
   :pr:`19568` by :user:`Shyam Desai <ShyamDesai>`.
@@ -456,20 +459,19 @@ Changelog
 .........................
 
 - |Enhancement| Add `max_samples` parameter in
-  :func:`inspection._permutation_importance`. It enables to draw a subset of
-  the samples to compute the permutation importance. This is useful to
-  keep the method tractable when evaluating feature importance on
-  large datasets.
+  :func:`inspection.permutation_importance`. It enables to draw a subset of the
+  samples to compute the permutation importance. This is useful to keep the
+  method tractable when evaluating feature importance on large datasets.
   :pr:`20431` by :user:`Oliver Pfaffel <o1iv3r>`.
 
 - |Enhancement| Add kwargs to format ICE and PD lines separately in partial
-  dependence plots :func:`~sklearn.inspection.plot_partial_dependence` and
-  :meth:`~sklearn.inspection.PartialDependenceDisplay.plot`.
-  :pr:`19428` by :user:`Mehdi Hamoumi <mhham>`.
+  dependence plots :func:`inspection.plot_partial_dependence` and
+  :meth:`inspection.PartialDependenceDisplay.plot`. :pr:`19428` by :user:`Mehdi
+  Hamoumi <mhham>`.
 
 - |Fix| Allow multiple scorers input to
-  :func:`~sklearn.inspection.permutation_importance`.
-  :pr:`19411` by :user:`Simona Maggio <simonamaggio>`.
+  :func:`inspection.permutation_importance`. :pr:`19411` by :user:`Simona
+  Maggio <simonamaggio>`.
 
 - |API| :class:`inspection.PartialDependenceDisplay` exposes a class method:
   :func:`~inspection.PartialDependenceDisplay.from_estimator`.
@@ -479,10 +481,10 @@ Changelog
 :mod:`sklearn.kernel_approximation`
 ...................................
 
-- |Fix| Fix a bug in :class:`sklearn.kernel_approximation.Nystroem`
-  where the attribute `component_indices_` did not correspond
-  to the subset of samples indices used to generate the approximated kernel.
-  :pr:`20554` by :user:`Xiangyin Kong <kxytim>`.
+- |Fix| Fix a bug in :class:`kernel_approximation.Nystroem`
+  where the attribute `component_indices_` did not correspond to the subset of
+  sample indices used to generate the approximated kernel. :pr:`20554` by
+  :user:`Xiangyin Kong <kxytim>`.
 
 :mod:`sklearn.linear_model`
 ...........................
@@ -504,33 +506,30 @@ Changelog
   :pr:`16449` by :user:`Christian Lorentzen <lorentzenchr>`.
 
 - |Feature| Added new solver `lbfgs` (available with `solver="lbfgs"`)
-  and `positive` argument to class:`linear_model.Ridge`.
-  When `positive` is set to True, forces the coefficients to be positive
-  (only supported by `lbfgs`).
-  :pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
+  and `positive` argument to :class:`linear_model.Ridge`. When `positive` is
+  set to `True`, forces the coefficients to be positive (only supported by
+  `lbfgs`). :pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
 
 - |Efficiency| The implementation of :class:`linear_model.LogisticRegression`
   has been optimised for dense matrices when using `solver='newton-cg'` and
   `multi_class!='multinomial'`.
   :pr:`19571` by :user:`Julien Jerphanion <jjerphan>`.
 
-- |Efficiency| The implementation of `fit` for `PolynomialFeatures` transformer
-  is now faster. This is especially noticeable on large sparse input.
-  :pr:`19734` by :user:`Fred Robinson <frrad>`.
-
 - |Enhancement| `fit` method preserves dtype for numpy.float32 in
-  :class:`Lars`, :class:`LassoLars`, :class:`LassoLars`, :class:`LarsCV` and
-  :class:`LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura <takoika>`.
+  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
+  :class:`linear_model.LassoLars`, :class:`linear_model.LarsCV` and
+  :class:`linear_model.LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura
+  <takoika>`.
 
 - |Enhancement| Validate user-supplied gram matrix passed to linear models
   via the `precompute` argument. :pr:`19004` by :user:`Adam Midvidy <amidvidy>`.
 
-- |Fix| :meth:`ElasticNet.fit` no longer modifies `sample_weight` in place.
-  :pr:`19055` by `Thomas Fan`_.
+- |Fix| :meth:`linear_model.ElasticNet.fit` no longer modifies `sample_weight`
+  in place. :pr:`19055` by `Thomas Fan`_.
 
-- |Fix| :class:`Lasso`, :class:`ElasticNet` no longer have a `dual_gap_`
-  not corresponding to their objective. :pr:`19172` by
-  :user:`Mathurin Massias <mathurinm>`
+- |Fix| :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet` no
+  longer have a `dual_gap_` not corresponding to their objective. :pr:`19172`
+  by :user:`Mathurin Massias <mathurinm>`
 
 - |Fix| `sample_weight` are now fully taken into account in linear models
   when `normalize=True` for both feature centering and feature
@@ -543,13 +542,9 @@ Changelog
   a model perfectly on some datasets when `residual_threshold=0`.
   :pr:`19499` by :user:`Gregory Strubel <gregorystrubel>`.
 
-- |Fix| Sample weight invariance for class:`Ridge` was fixed in :pr:`19616` by
-  :user:`Oliver Grisel <ogrisel>` and
-  :user:`Christian Lorentzen <lorentzenchr>`.
-
-- |Fix| The :func:`preprocessing.StandardScaler.inverse_transform` method
-  now raises error when the input data is 1D.
-  :pr:`19752` by :user:`Zhehao Liu <Max1993Liu>`.
+- |Fix| Sample weight invariance for :class:`linear_model.Ridge` was fixed in
+  :pr:`19616` by :user:`Oliver Grisel <ogrisel>` and :user:`Christian Lorentzen
+  <lorentzenchr>`.
 
 - |Fix| The dictionary `params` in :func:`linear_model.enet_path` and
   :func:`linear_model.lasso_path` should only contain parameter of the
@@ -557,60 +552,54 @@ Changelog
   :pr:`19391` by :user:`Shao Yang Hong <hongshaoyang>`.
 
 - |API| Raise a warning in :class:`linear_model.RANSACRegressor` that from
-  version 1.2, `min_samples` need to be set explicitly for model other than
-  :class:`linear_model.LinearRegression`.
-  :pr:`19390` by :user:`Shao Yang Hong <hongshaoyang>`.
+  version 1.2, `min_samples` need to be set explicitly for models other than
+  :class:`linear_model.LinearRegression`. :pr:`19390` by :user:`Shao Yang Hong
+  <hongshaoyang>`.
 
 - |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression`
-  is deprecated and will be removed in 1.2.
-  Motivation for this deprecation: ``normalize`` parameter did not take any
-  effect if ``fit_intercept`` was set to False and therefore was deemed
-  confusing.
-  The behavior of the deprecated LinearModel(normalize=True) can be
-  reproduced with :class:`~sklearn.pipeline.Pipeline` with
-  :class:`~sklearn.preprocessing.LinearModel` (where LinearModel is
-  LinearRegression, Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV) as
-  follows:
-  make_pipeline(StandardScaler(with_mean=False), LinearModel()).
-  The ``normalize`` parameter in :class:`linear_model.LinearRegression` was
-  deprecated in :pr:`17743` by
+  is deprecated and will be removed in 1.2. Motivation for this deprecation:
+  ``normalize`` parameter did not take any effect if ``fit_intercept`` was set
+  to False and therefore was deemed confusing. The behavior of the deprecated
+  ``LinearModel(normalize=True)`` can be reproduced with a
+  :class:`~sklearn.pipeline.Pipeline` with ``LinearModel`` (where
+  ``LinearModel`` is :class:`~linear_model.LinearRegression`,
+  :class:`~linear_model.Ridge`, :class:`~linear_model.RidgeClassifier`,
+  :class:`~linear_model.RidgeCV` or :class:`~linear_model.RidgeClassifierCV`)
+  as follows: ``make_pipeline(StandardScaler(with_mean=False),
+  LinearModel())``. The ``normalize`` parameter in
+  :class:`~linear_model.LinearRegression` was deprecated in :pr:`17743` by
   :user:`Maria Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`linear_model.Ridge`,  :class:`linear_model.RidgeClassifier`,
-  :class:`linear_model.RidgeCV`, and :class:`linear_model.RidgeClassifierCV`,
-  in: :pr:`17772` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`linear_model.BayesianRidge`,
-  :class:`linear_model.ARDRegression` in:
-  :pr:`17746` by :user:`Maria Telenczuk <maikia>`.
-  Same for :class:`linear_model.Lasso`, :class:`linear_model.LassoCV`,
-  :class:`linear_model.ElasticNet`, :class:`linear_model.ElasticNetCV`,
-  :class:`linear_model.MultiTaskLasso`, :class:`linear_model.MultiTaskLassoCV`,
-  :class:`linear_model.MultiTaskElasticNet`,
-  :class:`linear_model.MultiTaskElasticNetCV`, in:
-  :pr:`17785` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
-
-- The ``normalize`` parameter of :class:`linear_model.OrthogonalMatchingPursuit`
-  :class:`linear_model.OrthogonalMatchingPursuitCV` will default to
-  False in 1.2 and will be removed in 1.4.
-  :pr:`17750` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`linear_model.Lars`
-  :class:`linear_model.LarsCV` :class:`linear_model.LassoLars`
-  :class:`linear_model.LassoLarsCV` :class:`linear_model.LassoLarsIC`,
-  in :pr:`17769` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
+  Same for :class:`~linear_model.Ridge`,
+  :class:`~linear_model.RidgeClassifier`, :class:`~linear_model.RidgeCV`, and
+  :class:`~linear_model.RidgeClassifierCV`, in: :pr:`17772` by :user:`Maria
+  Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`. Same for
+  :class:`~linear_model.BayesianRidge`, :class:`~linear_model.ARDRegression`
+  in: :pr:`17746` by :user:`Maria Telenczuk <maikia>`. Same for
+  :class:`~linear_model.Lasso`, :class:`~linear_model.LassoCV`,
+  :class:`~linear_model.ElasticNet`, :class:`~linear_model.ElasticNetCV`,
+  :class:`~linear_model.MultiTaskLasso`,
+  :class:`~linear_model.MultiTaskLassoCV`,
+  :class:`~linear_model.MultiTaskElasticNet`,
+  :class:`~linear_model.MultiTaskElasticNetCV`, in: :pr:`17785` by :user:`Maria
+  Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
+
+- |API| The ``normalize`` parameter of
+  :class:`~linear_model.OrthogonalMatchingPursuit` and
+  :class:`~linear_model.OrthogonalMatchingPursuitCV` will default to False in
+  1.2 and will be removed in 1.4. :pr:`17750` by :user:`Maria Telenczuk
+  <maikia>` and :user:`Alexandre Gramfort <agramfort>`. Same for
+  :class:`~linear_model.Lars` :class:`~linear_model.LarsCV`
+  :class:`~linear_model.LassoLars` :class:`~linear_model.LassoLarsCV`
+  :class:`~linear_model.LassoLarsIC`, in :pr:`17769` by :user:`Maria Telenczuk
+  <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
 
 - |API| Keyword validation has moved from `__init__` and `set_params` to `fit`
   for the following estimators conforming to scikit-learn's conventions:
-  :class:`linear_model.SGDClassifier`,
-  :class:`linear_model.SparseSGDClassifier`,
-  :class:`linear_model.SGDRegressor`,
-  :class:`linear_model.SparseSGDRegressor`,
-  :class:`linear_model.SGDOneClassSVM`,
-  :class:`linear_model.SparseSGDOneClassSVM`,
-  :class:`linear_model.PassiveAggressiveClassifier`,
-  :class:`linear_model.PassiveAggressiveRegressor`.
+  :class:`~linear_model.SGDClassifier`,
+  :class:`~linear_model.SGDRegressor`,
+  :class:`~linear_model.SGDOneClassSVM`,
+  :class:`~linear_model.PassiveAggressiveClassifier`, and
+  :class:`~linear_model.PassiveAggressiveRegressor`.
   :pr:`20683` by `Guillaume Lemaitre`_.
 
 :mod:`sklearn.manifold`
@@ -638,11 +627,6 @@ Changelog
 - |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for
   quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
   and :user:`Oliver Grisel <ogrisel>`.
-  
-- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`
-  and :func:`metrics.specificity_score`.
-  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
-  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
 
 - |Feature| :func:`metrics.d2_tweedie_score` calculates the D^2 regression
   score for Tweedie deviances with power parameter ``power``. This is a
@@ -672,9 +656,9 @@ Changelog
   :pr:`18328` by :user:`Albert Villanova del Moral <albertvillanova>` and
   :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
-- |Fix| avoid overflow in :func:`metrics.cluster.adjust_rand_score` with large
-  amount of data.
-  :pr:`20312` by :user:`Divyanshu Deoli <divyanshudeoli>`.
+- |Fix| avoid overflow in :func:`metrics.cluster.adjusted_rand_score` with
+  large amount of data. :pr:`20312` by :user:`Divyanshu Deoli
+  <divyanshudeoli>`.
 
 - |API| :class:`metrics.ConfusionMatrixDisplay` exposes two class methods
   :func:`~metrics.ConfusionMatrixDisplay.from_estimator` and
@@ -758,7 +742,7 @@ Changelog
 
 - |FIX| :class:`neighbors.NearestNeighbors`, :class:`neighbors.KNeighborsClassifier`,
   :class:`neighbors.RadiusNeighborsClassifier`, :class:`neighbors.KNeighborsRegressor`
-  and :class:`neighbors.RadiusNeighborsRegressor` does not validate `weights` in
+  and :class:`neighbors.RadiusNeighborsRegressor` do not validate `weights` in
   `__init__` and validates `weights` in `fit` instead. :pr:`20072` by
   :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
 
@@ -777,9 +761,8 @@ Changelog
 .......................
 
 - |API| The `predict_proba` and `predict_log_proba` methods of the
-  :class:`Pipeline` class now support passing prediction kwargs to
-  the final estimator.
-  :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
+  :class:`pipeline.Pipeline` now support passing prediction kwargs to the final
+  estimator. :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
 
 :mod:`sklearn.preprocessing`
 ............................
@@ -807,13 +790,22 @@ Changelog
   a tuple to `degree`, i.e. `degree=(min_degree, max_degree)`.
   :pr:`20250` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Efficiency| `preprocessing.StandardScaler` is faster and more memory
+- |Efficiency| :class:`preprocessing.StandardScaler` is faster and more memory
   efficient. :pr:`20652` by `Thomas Fan`_.
 
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``.
   :pr:`19934` by :user:`Gleb Levitskiy <GLevV>`.
 
+- |Efficiency| The implementation of `fit` for
+  :class:`preprocessing.PolynomialFeatures` transformer is now faster. This is
+  especially noticeable on large sparse input. :pr:`19734` by :user:`Fred
+  Robinson <frrad>`.
+
+- |Fix| The :func:`preprocessing.StandardScaler.inverse_transform` method
+  now raises error when the input data is 1D. :pr:`19752` by :user:`Zhehao Liu
+  <Max1993Liu>`.
+
 - |Fix| :func:`preprocessing.scale`, :class:`preprocessing.StandardScaler`
   and similar scalers detect near-constant features to avoid scaling them to
   very large values. This problem happens in particular when using a scaler on
@@ -826,7 +818,7 @@ Changelog
   correctly handles integer dtypes. :pr:`19356` by :user:`makoeppel`.
 
 - |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` is not
-  supporting sparse matrix and raise the appropriate error message.
+  supporting sparse matrix and raises the appropriate error message.
   :pr:`19879` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - |Fix| The `fit` method of :class:`preprocessing.OrdinalEncoder` will not
@@ -839,14 +831,14 @@ Changelog
   (`np.float64` or `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
 
+- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
+  based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.
+
 - |API| The `n_input_features_` attribute of
   :class:`preprocessing.PolynomialFeatures` is deprecated in favor of
   `n_features_in_` and will be removed in 1.2. :pr:`20240` by
   :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
-  based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.
-
 :mod:`sklearn.svm`
 ...................
 
@@ -890,8 +882,8 @@ Changelog
   :pr:`19948` by `Joel Nothman`_.
 
 - |Enhancement| :func:`utils.validation.check_is_fitted` now uses
-  ``__sklearn_is_fitted__`` if available, instead of checking for attributes ending with
-  an underscore. This also makes :class:`Pipeline` and
+  ``__sklearn_is_fitted__`` if available, instead of checking for attributes
+  ending with an underscore. This also makes :class:`pipeline.Pipeline` and
   :class:`preprocessing.FunctionTransformer` pass
   ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_.
 
@@ -899,8 +891,9 @@ Changelog
   precision of the computed variance was very poor when the real variance is
   exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| Propreties that are decorated with :func:`utils.deprecated` correctly
-  wraps the property's docstring. :pr:`20385` by `Thomas Fan`_.
+- |Fix| The docstrings of propreties that are decorated with
+  :func:`utils.deprecated` are now properly wrapped. :pr:`20385` by `Thomas
+  Fan`_.
 
 - |Fix| :func:`utils.stats._weighted_percentile` now correctly ignores
   zero-weighted observations smaller than the smallest observation with
@@ -923,6 +916,10 @@ Changelog
   `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
 
+- |Fix| Support for `np.matrix` is deprecated in
+  :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in
+  1.2. :pr:`20165` by `Thomas Fan`_.
+
 - |API| :func:`utils._testing.assert_warns` and
   :func:`utils._testing.assert_warns_message` are deprecated in 1.0 and will
   be removed in 1.2. Used `pytest.warns` context manager instead. Note that
@@ -933,17 +930,64 @@ Changelog
   now deprecated. Use `scipy.sparse.csgraph.shortest_path` instead. :pr:`20531`
   by `Tom Dupre la Tour`_.
 
-:mod:`sklearn.validation`
-.........................
-
-- |Fix| Support for `np.matrix` is deprecated in
-  :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in
-  1.2. :pr:`20165` by `Thomas Fan`_.
-
 Code and Documentation Contributors
 -----------------------------------
 
 Thanks to everyone who has contributed to the maintenance and improvement of
 the project since version 0.24, including:
 
-TODO: update at the time of the release.
+Abdulelah S. Al Mesfer, Abhinav Gupta, Adam J. Stewart, Adam Li, Adam Midvidy,
+Adrian Garcia Badaracco, Adrian Sadłocha, Adrin Jalali, Agamemnon Krasoulis,
+Alberto Rubiales, Albert Thomas, Albert Villanova del Moral, Alek Lefebvre,
+Alessia Marcolini, Alexandr Fonari, Alihan Zihna, Aline Ribeiro de Almeida,
+Amanda, Amanda Dsouza, Amol Deshmukh, Ana Pessoa, Anavelyz, Andreas Mueller,
+Andrew Delong, Ashish, Ashvith Shetty, Atsushi Nukariya, Avi Gupta, Ayush
+Singh, baam, BaptBillard, Benjamin Pedigo, Bertrand Thirion, Bharat
+Raghunathan, bmalezieux, Brian Rice, Brian Sun, Bruno Charron, Bryan Chen,
+bumblebee, caherrera-meli, Carsten Allefeld, CeeThinwa, Chiara Marmo,
+chrissobel, Christian Lorentzen, Christopher Yeh, Chuliang Xiao, Clément
+Fauchereau, cliffordEmmanuel, Conner Shen, Connor Tann, David Dale, David Katz,
+David Poznik, Divyanshu Deoli, dmallia17, Dmitry Kobak, DS_anas, Eduardo
+Jardim, EdwinWenink, EL-ATEIF Sara, Eleni Markou, Eric Fiegel, Erich Schubert,
+Ezri-Mudde, Fatos Morina, Felipe Rodrigues, Felix Hafner, Fenil Suchak,
+flyingdutchman23, Flynn, Fortune Uwha, Francois Berenger, Frankie Robertson,
+Frans Larsson, Frederick Robinson, Gabriel S Vicente, Gael Varoquaux, genvalen,
+Geoffrey Thomas, geroldcsendes, Gleb Levitskiy, Glen, Glòria Macià Muñoz,
+gregorystrubel, groceryheist, Guillaume Lemaitre, guiweber, Haidar Almubarak,
+Hans Moritz Günther, Haoyin Xu, Harris Mirza, Harry Wei, Harutaka Kawamura,
+Hassan Alsawadi, Helder Geovane Gomes de Lima, Hugo DEFOIS, Igor Ilic, Ikko
+Ashimine, Isaack Mungui, Ishaan Bhat, Ishan Mishra, Iván Pulido, iwhalvic,
+Jack Liu, James Alan Preiss, James Budarz, James Lamb, Jannik, Jeff Zhao,
+Jennifer Maldonado, Jérémie du Boisberranger, Jesse Lima, Jianzhu Guo,
+jnboehm, Joel Nothman, JohanWork, John Paton, Jonathan Schneider, Jon Crall,
+Jon Haitz Legarreta Gorroño, Joris Van den Bossche, José Manuel Nápoles
+Duarte, Juan Carlos Alfaro Jiménez, Juan Martin Loyola, Julien Jerphanion,
+Julio Batista Silva, julyrashchenko, JVM, Kadatatlu Kishore, Karen Palacio, Kei
+Ishikawa, kmatt10, kobaski, Kot271828, Kunj, KurumeYuta, kxytim, lacrosse91,
+LalliAcqua, Laveen Bagai, Leonardo Rocco, Leonardo Uieda, Leopoldo Corona, Loic
+Esteve, LSturtew, Luca Bittarello, Luccas Quadros, Lucy Jiménez, Lucy Liu,
+ly648499246, Mabu Manaileng, makoeppel, Marco Gorelli, Maren Westermann,
+Mariangela, Maria Telenczuk, marielaraj, Martin Hirzel, Mateo Noreña, Mathieu
+Blondel, Mathis Batoul, mathurinm, Matthew Calcote, Maxime Prieur, Maxwell,
+Mehdi Hamoumi, Mehmet Ali Özer, Miao Cai, Michal Karbownik, michalkrawczyk,
+Mitzi, mlondschien, Mohamed Haseeb, Mohamed Khoualed, Muhammad Jarir Kanji,
+murata-yu, Nadim Kawwa, Nanshan Li, naozin555, Nate Parsons, Neal Fultz, Nic
+Annau, Nicolas Hug, Nicolas Miller, Nico Stefani, Nigel Bosch, Nodar
+Okroshiashvili, Norbert Preining, novaya, Ogbonna Chibuike Stephen, OGordon100,
+Oliver Pfaffel, Olivier Grisel, Oras Phongpanangam, Pablo Duque, Pablo
+Ibieta-Jimenez, Patric Lacouth, Paulo S. Costa, Paweł Olszewski, Peter Dye,
+PierreAttard, Pierre-Yves Le Borgne, PranayAnchuri, Prince Canuma, putschblos,
+qdeffense, RamyaNP, ranjanikrishnan, Ray Bell, Rene Jean Corneille, Reshama
+Shaikh, ricardojnf, RichardScottOZ, Rodion Martynov, Rohan Paul, Roman Lutz,
+Roman Yurchak, Samuel Brice, Sandy Khosasi, Sean Benhur J, Sebastian Flores,
+Sebastian Pölsterl, Shao Yang Hong, shinehide, shinnar, shivamgargsya,
+Shooter23, Shuhei Kayawari, Shyam Desai, simonamaggio, Sina Tootoonian,
+solosilence, Steven Kolawole, Steve Stagg, Surya Prakash, swpease, Sylvain
+Marié, Takeshi Oura, Terence Honles, TFiFiE, Thomas A Caswell, Thomas J. Fan,
+Tim Gates, TimotheeMathieu, Timothy Wolodzko, Tim Vink, t-jakubek, t-kusanagi,
+tliu68, Tobias Uhmann, tom1092, Tomás Moreyra, Tomás Ronald Hughes, Tom
+Dupré la Tour, Tommaso Di Noto, Tomohiro Endo, Toshihiro NAKAE, tsuga, Uttam
+kumar, vadim-ushtanit, Vangelis Gkiastas, Venkatachalam N, Vilém Zouhar,
+Vinicius Rios Fuck, Vlasovets, waijean, Whidou, xavier dupré, xiaoyuchai,
+Yasmeen Alsaedy, yoch, Yosuke KOBAYASHI, Yu Feng, YusukeNagasaka, yzhenman,
+Zero, ZeyuSun, ZhaoweiWang, Zito, Zito Relova

From 356659cef512adb95a8aafd5730607eb4a4a7d7c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 11:55:56 +0200
Subject: [PATCH 057/127] Revert "Revert change to v1.0.rst"

This reverts commit 17ad4fa7358b937cfbce3827210bba3afc7afd97.
---
 doc/whats_new/v1.0.rst | 366 ++++++++++++++++++-----------------------
 1 file changed, 161 insertions(+), 205 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index d8083fe6c6a9e..8dcc12268c0ec 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -30,6 +30,8 @@ parameter is used as positional, a `TypeError` is now raised.
 <https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep009/proposal.html>`_
 for more details.
 
+Put the changes in their relevant module.
+
 Changed models
 --------------
 
@@ -133,14 +135,13 @@ Changelog
   1.2. :pr:`20165` by `Thomas Fan`_.
 
 - |API| :term:`get_feature_names_out` has been added to the transformer API
-  to get the names of the output features. :term:`get_feature_names` has in
-  turn been deprecated. :pr:`18444` by `Thomas Fan`_.
+  to get the names of the output features. :pr:`18444` by `Thomas Fan`_.
 
 - |API| All estimators store `feature_names_in_` when fitted on pandas Dataframes.
-  These feature names are compared to names seen in non-`fit` methods, e.g.
-  `transform` and will raise a `FutureWarning` if they are not consistent.
-  These ``FutureWarning`` s will become ``ValueError`` s in 1.2. :pr:`18010` by
-  `Thomas Fan`_.
+  These feature names are compared to names seen in `non-fit` methods,
+  `i.e.` `transform` and will raise a `FutureWarning` if they are not consistent.
+  These `FutureWarning`s will become `ValueError`s in 1.2.
+  :pr:`18010` by `Thomas Fan`_.
 
 :mod:`sklearn.base`
 ...................
@@ -153,7 +154,7 @@ Changelog
 - |Feature| :func:`calibration.CalibrationDisplay` added to plot
   calibration curves. :pr:`17443` by :user:`Lucy Liu <lucyleeow>`.
 
-- |Fix| The ``predict`` and ``predict_proba`` methods of
+- |Fix| The predict and predict_proba methods of
   :class:`calibration.CalibratedClassifierCV` can now properly be used on
   prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`.
 
@@ -165,10 +166,10 @@ Changelog
 :mod:`sklearn.cluster`
 ......................
 
-- |Efficiency| The ``"k-means++"`` initialization of :class:`cluster.KMeans`
-  and :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
-  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and :user:`Jérémie du
-  Boisberranger <jeremiedbb>`.
+- |Efficiency| The "k-means++" initialization of :class:`cluster.KMeans` and
+  :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
+  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |Efficiency| :class:`cluster.KMeans` with `algorithm='elkan'` is now faster
   in multicore settings. :pr:`19052` by
@@ -215,11 +216,11 @@ Changelog
   deprecated and will be removed in 1.2. :pr:`19297` by `Thomas Fan`_.
 
 - |API| the default value for the `batch_size` parameter of
-  :class:`cluster.MiniBatchKMeans` was changed from 100 to 1024 due to
-  efficiency reasons. The `n_iter_` attribute of
-  :class:`cluster.MiniBatchKMeans` now reports the number of started epochs and
-  the `n_steps_` attribute reports the number of mini batches processed.
-  :pr:`17622` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  :class:`MiniBatchKMeans` was changed from 100 to 1024 due to efficiency
+  reasons. The `n_iter_` attribute of :class:`MiniBatchKMeans` now reports the
+  number of started epochs and the `n_steps_` attribute reports the number of
+  mini batches processed. :pr:`17622`
+  by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |API| :func:`cluster.spectral_clustering` raises an improved error when passed
   a `np.matrix`. :pr:`20560` by `Thomas Fan`_.
@@ -227,6 +228,10 @@ Changelog
 :mod:`sklearn.compose`
 ......................
 
+- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
+  This flag controls the prefixing of feature names out in
+  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
+
 - |Enhancement| :class:`compose.ColumnTransformer` now records the output
   of each transformer in `output_indices_`. :pr:`18393` by
   :user:`Luca Bittarello <lbittarello>`.
@@ -242,25 +247,20 @@ Changelog
   :pr:`19244` by :user:`Ricardo <ricardojnf>`.
 
 - |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports
-  non-string feature names returned by any of its transformers. However, note
-  that ``get_feature_names`` is deprecated, use ``get_feature_names_out``
-  instead. :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>`
-  and :user:`Alonso Silva Allende <alonsosilvaallende>`.
+  non-string feature names returned by any of its transformers.
+  :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>` and
+  :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
 - |Fix| :class:`compose.TransformedTargetRegressor` now takes nD targets with
   an adequate transformer.
   :pr:`18898` by :user:`Oras Phongpanagnam <panangam>`.
 
-- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
-  This flag controls the prefixing of feature names out in
-  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
-
 :mod:`sklearn.covariance`
 .........................
 
 - |Fix| Adds arrays check to :func:`covariance.ledoit_wolf` and
-  :func:`covariance.ledoit_wolf_shrinkage`. :pr:`20416` by :user:`Hugo Defois
-  <defoishugo>`.
+  :func:`covariance.ledoit_wolf_shrinkage`.
+  :pr:`20416` by :user:`Hugo Defois <defoishugo>`.
 
 - |API| Deprecates the following keys in `cv_results_`: `'mean_score'`,
   `'std_score'`, and `'split(k)_score'` in favor of `'mean_test_score'`
@@ -308,31 +308,28 @@ Changelog
 - |Fix| Fixes incorrect multiple data-conversion warnings when clustering
   boolean data. :pr:`19046` by :user:`Surya Prakash <jdsurya>`.
 
-- |Fix| Fixed :func:`dict_learning`, used by
-  :class:`decomposition.DictionaryLearning`, to ensure determinism of the
-  output. Achieved by flipping signs of the SVD output which is used to
-  initialize the code. :pr:`18433` by :user:`Bruno Charron <brcharron>`.
-
-- |Fix| Fixed a bug in :class:`decomposition.MiniBatchDictionaryLearning`,
-  :class:`decomposition.MiniBatchSparsePCA` and
-  :func:`decomposition.dict_learning_online` where the update of the dictionary
-  was incorrect. :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Fix| Fixed :func:`dict_learning`, used by :class:`DictionaryLearning`, to
+  ensure determinism of the output. Achieved by flipping signs of the SVD
+  output which is used to initialize the code.
+  :pr:`18433` by :user:`Bruno Charron <brcharron>`.
 
-- |Fix| Fixed a bug in :class:`decomposition.DictionaryLearning`,
-  :class:`decomposition.SparsePCA`,
-  :class:`decomposition.MiniBatchDictionaryLearning`,
-  :class:`decomposition.MiniBatchSparsePCA`,
-  :func:`decomposition.dict_learning` and
-  :func:`decomposition.dict_learning_online` where the restart of unused atoms
-  during the dictionary update was not working as expected. :pr:`19198` by
+- |Fix| Fixed a bug in :class:`MiniBatchDictionaryLearning`,
+  :class:`MiniBatchSparsePCA` and :func:`dict_learning_online` where the
+  update of the dictionary was incorrect. :pr:`19198` by
   :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+- |Fix| Fixed a bug in :class:`DictionaryLearning`, :class:`SparsePCA`,
+  :class:`MiniBatchDictionaryLearning`, :class:`MiniBatchSparsePCA`,
+  :func:`dict_learning` and :func:`dict_learning_online` where the restart of
+  unused atoms during the dictionary update was not working as expected.
+  :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
 - |API| In :class:`decomposition.DictionaryLearning`,
   :class:`decomposition.MiniBatchDictionaryLearning`,
-  :func:`decomposition.dict_learning` and
-  :func:`decomposition.dict_learning_online`, `transform_alpha` will be equal
-  to `alpha` instead of 1.0 by default starting from version 1.2 :pr:`19159` by
-  :user:`Benoît Malézieux <bmalezieux>`.
+  :func:`dict_learning` and :func:`dict_learning_online`,
+  `transform_alpha` will be equal to `alpha` instead of 1.0 by default
+  starting from version 1.2
+  :pr:`19159` by :user:`Benoît Malézieux <bmalezieux>`.
 
 - |API| Rename variable names in :class:`KernelPCA` to improve
   readability. `lambdas_` and `alphas_` are renamed to `eigenvalues_`
@@ -371,9 +368,9 @@ Changelog
   :class:`ensemble.StackingClassifier` and :class:`ensemble.StackingRegressor`.
   :pr:`19564` by `Thomas Fan`_.
 
-- |Enhancement| Added Poisson criterion to
-  :class:`ensemble.RandomForestRegressor`. :pr:`19836` by :user:`Brian Sun
-  <bsun94>`.
+- |Enhancement| Documented and tested support of the Poisson criterion for
+  :class:`ensemble.RandomForestRegressor`. :pr:`19836` by
+  :user:`Brian Sun <bsun94>`.
 
 - |Fix| Do not allow to compute out-of-bag (OOB) score in
   :class:`ensemble.RandomForestClassifier` and
@@ -387,7 +384,7 @@ Changelog
   to avoid underflows.
   :pr:`10096` by :user:`Fenil Suchak <fenilsuchak>`.
 
-- |Fix| Fixed the range of the argument ``max_samples`` to be ``(0.0, 1.0]``
+- |Fix| Fixed the range of the argument max_samples to be (0.0, 1.0]
   in :class:`ensemble.RandomForestClassifier`,
   :class:`ensemble.RandomForestRegressor`, where `max_samples=1.0` is
   interpreted as using all `n_samples` for bootstrapping. :pr:`20159` by
@@ -438,10 +435,10 @@ Changelog
   :user:`Gil Rutter <g-rutter>`, and :user:`Adrin Jalali <adrinjalali>`.
 
 - |FIX| Fix a bug in :func:`isotonic.isotonic_regression` where the
-  `sample_weight` passed by a user were overwritten during ``fit``.
+  `sample_weight` passed by a user were overwritten during the fit.
   :pr:`20515` by :user:`Carsten Allefeld <allefeld>`.
 
-- |Fix| Change :func:`feature_selection.SequentialFeatureSelector` to
+- |Fix| Change :func:`feature_selection.SequentialFeatureSelection` to
   allow for unsupervised modelling so that the `fit` signature need not
   do any `y` validation and allow for `y=None`.
   :pr:`19568` by :user:`Shyam Desai <ShyamDesai>`.
@@ -459,19 +456,20 @@ Changelog
 .........................
 
 - |Enhancement| Add `max_samples` parameter in
-  :func:`inspection.permutation_importance`. It enables to draw a subset of the
-  samples to compute the permutation importance. This is useful to keep the
-  method tractable when evaluating feature importance on large datasets.
+  :func:`inspection._permutation_importance`. It enables to draw a subset of
+  the samples to compute the permutation importance. This is useful to
+  keep the method tractable when evaluating feature importance on
+  large datasets.
   :pr:`20431` by :user:`Oliver Pfaffel <o1iv3r>`.
 
 - |Enhancement| Add kwargs to format ICE and PD lines separately in partial
-  dependence plots :func:`inspection.plot_partial_dependence` and
-  :meth:`inspection.PartialDependenceDisplay.plot`. :pr:`19428` by :user:`Mehdi
-  Hamoumi <mhham>`.
+  dependence plots :func:`~sklearn.inspection.plot_partial_dependence` and
+  :meth:`~sklearn.inspection.PartialDependenceDisplay.plot`.
+  :pr:`19428` by :user:`Mehdi Hamoumi <mhham>`.
 
 - |Fix| Allow multiple scorers input to
-  :func:`inspection.permutation_importance`. :pr:`19411` by :user:`Simona
-  Maggio <simonamaggio>`.
+  :func:`~sklearn.inspection.permutation_importance`.
+  :pr:`19411` by :user:`Simona Maggio <simonamaggio>`.
 
 - |API| :class:`inspection.PartialDependenceDisplay` exposes a class method:
   :func:`~inspection.PartialDependenceDisplay.from_estimator`.
@@ -481,10 +479,10 @@ Changelog
 :mod:`sklearn.kernel_approximation`
 ...................................
 
-- |Fix| Fix a bug in :class:`kernel_approximation.Nystroem`
-  where the attribute `component_indices_` did not correspond to the subset of
-  sample indices used to generate the approximated kernel. :pr:`20554` by
-  :user:`Xiangyin Kong <kxytim>`.
+- |Fix| Fix a bug in :class:`sklearn.kernel_approximation.Nystroem`
+  where the attribute `component_indices_` did not correspond
+  to the subset of samples indices used to generate the approximated kernel.
+  :pr:`20554` by :user:`Xiangyin Kong <kxytim>`.
 
 :mod:`sklearn.linear_model`
 ...........................
@@ -506,30 +504,33 @@ Changelog
   :pr:`16449` by :user:`Christian Lorentzen <lorentzenchr>`.
 
 - |Feature| Added new solver `lbfgs` (available with `solver="lbfgs"`)
-  and `positive` argument to :class:`linear_model.Ridge`. When `positive` is
-  set to `True`, forces the coefficients to be positive (only supported by
-  `lbfgs`). :pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
+  and `positive` argument to class:`linear_model.Ridge`.
+  When `positive` is set to True, forces the coefficients to be positive
+  (only supported by `lbfgs`).
+  :pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
 
 - |Efficiency| The implementation of :class:`linear_model.LogisticRegression`
   has been optimised for dense matrices when using `solver='newton-cg'` and
   `multi_class!='multinomial'`.
   :pr:`19571` by :user:`Julien Jerphanion <jjerphan>`.
 
+- |Efficiency| The implementation of `fit` for `PolynomialFeatures` transformer
+  is now faster. This is especially noticeable on large sparse input.
+  :pr:`19734` by :user:`Fred Robinson <frrad>`.
+
 - |Enhancement| `fit` method preserves dtype for numpy.float32 in
-  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
-  :class:`linear_model.LassoLars`, :class:`linear_model.LarsCV` and
-  :class:`linear_model.LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura
-  <takoika>`.
+  :class:`Lars`, :class:`LassoLars`, :class:`LassoLars`, :class:`LarsCV` and
+  :class:`LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura <takoika>`.
 
 - |Enhancement| Validate user-supplied gram matrix passed to linear models
   via the `precompute` argument. :pr:`19004` by :user:`Adam Midvidy <amidvidy>`.
 
-- |Fix| :meth:`linear_model.ElasticNet.fit` no longer modifies `sample_weight`
-  in place. :pr:`19055` by `Thomas Fan`_.
+- |Fix| :meth:`ElasticNet.fit` no longer modifies `sample_weight` in place.
+  :pr:`19055` by `Thomas Fan`_.
 
-- |Fix| :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet` no
-  longer have a `dual_gap_` not corresponding to their objective. :pr:`19172`
-  by :user:`Mathurin Massias <mathurinm>`
+- |Fix| :class:`Lasso`, :class:`ElasticNet` no longer have a `dual_gap_`
+  not corresponding to their objective. :pr:`19172` by
+  :user:`Mathurin Massias <mathurinm>`
 
 - |Fix| `sample_weight` are now fully taken into account in linear models
   when `normalize=True` for both feature centering and feature
@@ -542,9 +543,13 @@ Changelog
   a model perfectly on some datasets when `residual_threshold=0`.
   :pr:`19499` by :user:`Gregory Strubel <gregorystrubel>`.
 
-- |Fix| Sample weight invariance for :class:`linear_model.Ridge` was fixed in
-  :pr:`19616` by :user:`Oliver Grisel <ogrisel>` and :user:`Christian Lorentzen
-  <lorentzenchr>`.
+- |Fix| Sample weight invariance for class:`Ridge` was fixed in :pr:`19616` by
+  :user:`Oliver Grisel <ogrisel>` and
+  :user:`Christian Lorentzen <lorentzenchr>`.
+
+- |Fix| The :func:`preprocessing.StandardScaler.inverse_transform` method
+  now raises error when the input data is 1D.
+  :pr:`19752` by :user:`Zhehao Liu <Max1993Liu>`.
 
 - |Fix| The dictionary `params` in :func:`linear_model.enet_path` and
   :func:`linear_model.lasso_path` should only contain parameter of the
@@ -552,54 +557,60 @@ Changelog
   :pr:`19391` by :user:`Shao Yang Hong <hongshaoyang>`.
 
 - |API| Raise a warning in :class:`linear_model.RANSACRegressor` that from
-  version 1.2, `min_samples` need to be set explicitly for models other than
-  :class:`linear_model.LinearRegression`. :pr:`19390` by :user:`Shao Yang Hong
-  <hongshaoyang>`.
+  version 1.2, `min_samples` need to be set explicitly for model other than
+  :class:`linear_model.LinearRegression`.
+  :pr:`19390` by :user:`Shao Yang Hong <hongshaoyang>`.
 
 - |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression`
-  is deprecated and will be removed in 1.2. Motivation for this deprecation:
-  ``normalize`` parameter did not take any effect if ``fit_intercept`` was set
-  to False and therefore was deemed confusing. The behavior of the deprecated
-  ``LinearModel(normalize=True)`` can be reproduced with a
-  :class:`~sklearn.pipeline.Pipeline` with ``LinearModel`` (where
-  ``LinearModel`` is :class:`~linear_model.LinearRegression`,
-  :class:`~linear_model.Ridge`, :class:`~linear_model.RidgeClassifier`,
-  :class:`~linear_model.RidgeCV` or :class:`~linear_model.RidgeClassifierCV`)
-  as follows: ``make_pipeline(StandardScaler(with_mean=False),
-  LinearModel())``. The ``normalize`` parameter in
-  :class:`~linear_model.LinearRegression` was deprecated in :pr:`17743` by
+  is deprecated and will be removed in 1.2.
+  Motivation for this deprecation: ``normalize`` parameter did not take any
+  effect if ``fit_intercept`` was set to False and therefore was deemed
+  confusing.
+  The behavior of the deprecated LinearModel(normalize=True) can be
+  reproduced with :class:`~sklearn.pipeline.Pipeline` with
+  :class:`~sklearn.preprocessing.LinearModel` (where LinearModel is
+  LinearRegression, Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV) as
+  follows:
+  make_pipeline(StandardScaler(with_mean=False), LinearModel()).
+  The ``normalize`` parameter in :class:`linear_model.LinearRegression` was
+  deprecated in :pr:`17743` by
   :user:`Maria Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`~linear_model.Ridge`,
-  :class:`~linear_model.RidgeClassifier`, :class:`~linear_model.RidgeCV`, and
-  :class:`~linear_model.RidgeClassifierCV`, in: :pr:`17772` by :user:`Maria
-  Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`. Same for
-  :class:`~linear_model.BayesianRidge`, :class:`~linear_model.ARDRegression`
-  in: :pr:`17746` by :user:`Maria Telenczuk <maikia>`. Same for
-  :class:`~linear_model.Lasso`, :class:`~linear_model.LassoCV`,
-  :class:`~linear_model.ElasticNet`, :class:`~linear_model.ElasticNetCV`,
-  :class:`~linear_model.MultiTaskLasso`,
-  :class:`~linear_model.MultiTaskLassoCV`,
-  :class:`~linear_model.MultiTaskElasticNet`,
-  :class:`~linear_model.MultiTaskElasticNetCV`, in: :pr:`17785` by :user:`Maria
-  Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
-
-- |API| The ``normalize`` parameter of
-  :class:`~linear_model.OrthogonalMatchingPursuit` and
-  :class:`~linear_model.OrthogonalMatchingPursuitCV` will default to False in
-  1.2 and will be removed in 1.4. :pr:`17750` by :user:`Maria Telenczuk
-  <maikia>` and :user:`Alexandre Gramfort <agramfort>`. Same for
-  :class:`~linear_model.Lars` :class:`~linear_model.LarsCV`
-  :class:`~linear_model.LassoLars` :class:`~linear_model.LassoLarsCV`
-  :class:`~linear_model.LassoLarsIC`, in :pr:`17769` by :user:`Maria Telenczuk
-  <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
+  Same for :class:`linear_model.Ridge`,  :class:`linear_model.RidgeClassifier`,
+  :class:`linear_model.RidgeCV`, and :class:`linear_model.RidgeClassifierCV`,
+  in: :pr:`17772` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
+  Same for :class:`linear_model.BayesianRidge`,
+  :class:`linear_model.ARDRegression` in:
+  :pr:`17746` by :user:`Maria Telenczuk <maikia>`.
+  Same for :class:`linear_model.Lasso`, :class:`linear_model.LassoCV`,
+  :class:`linear_model.ElasticNet`, :class:`linear_model.ElasticNetCV`,
+  :class:`linear_model.MultiTaskLasso`, :class:`linear_model.MultiTaskLassoCV`,
+  :class:`linear_model.MultiTaskElasticNet`,
+  :class:`linear_model.MultiTaskElasticNetCV`, in:
+  :pr:`17785` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
+
+- The ``normalize`` parameter of :class:`linear_model.OrthogonalMatchingPursuit`
+  :class:`linear_model.OrthogonalMatchingPursuitCV` will default to
+  False in 1.2 and will be removed in 1.4.
+  :pr:`17750` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
+  Same for :class:`linear_model.Lars`
+  :class:`linear_model.LarsCV` :class:`linear_model.LassoLars`
+  :class:`linear_model.LassoLarsCV` :class:`linear_model.LassoLarsIC`,
+  in :pr:`17769` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
 
 - |API| Keyword validation has moved from `__init__` and `set_params` to `fit`
   for the following estimators conforming to scikit-learn's conventions:
-  :class:`~linear_model.SGDClassifier`,
-  :class:`~linear_model.SGDRegressor`,
-  :class:`~linear_model.SGDOneClassSVM`,
-  :class:`~linear_model.PassiveAggressiveClassifier`, and
-  :class:`~linear_model.PassiveAggressiveRegressor`.
+  :class:`linear_model.SGDClassifier`,
+  :class:`linear_model.SparseSGDClassifier`,
+  :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.SparseSGDRegressor`,
+  :class:`linear_model.SGDOneClassSVM`,
+  :class:`linear_model.SparseSGDOneClassSVM`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor`.
   :pr:`20683` by `Guillaume Lemaitre`_.
 
 :mod:`sklearn.manifold`
@@ -627,6 +638,11 @@ Changelog
 - |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for
   quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
   and :user:`Oliver Grisel <ogrisel>`.
+  
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`
+  and :func:`metrics.specificity_score`.
+  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
+  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
 
 - |Feature| :func:`metrics.d2_tweedie_score` calculates the D^2 regression
   score for Tweedie deviances with power parameter ``power``. This is a
@@ -656,9 +672,9 @@ Changelog
   :pr:`18328` by :user:`Albert Villanova del Moral <albertvillanova>` and
   :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
-- |Fix| avoid overflow in :func:`metrics.cluster.adjusted_rand_score` with
-  large amount of data. :pr:`20312` by :user:`Divyanshu Deoli
-  <divyanshudeoli>`.
+- |Fix| avoid overflow in :func:`metrics.cluster.adjust_rand_score` with large
+  amount of data.
+  :pr:`20312` by :user:`Divyanshu Deoli <divyanshudeoli>`.
 
 - |API| :class:`metrics.ConfusionMatrixDisplay` exposes two class methods
   :func:`~metrics.ConfusionMatrixDisplay.from_estimator` and
@@ -742,7 +758,7 @@ Changelog
 
 - |FIX| :class:`neighbors.NearestNeighbors`, :class:`neighbors.KNeighborsClassifier`,
   :class:`neighbors.RadiusNeighborsClassifier`, :class:`neighbors.KNeighborsRegressor`
-  and :class:`neighbors.RadiusNeighborsRegressor` do not validate `weights` in
+  and :class:`neighbors.RadiusNeighborsRegressor` does not validate `weights` in
   `__init__` and validates `weights` in `fit` instead. :pr:`20072` by
   :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
 
@@ -761,8 +777,9 @@ Changelog
 .......................
 
 - |API| The `predict_proba` and `predict_log_proba` methods of the
-  :class:`pipeline.Pipeline` now support passing prediction kwargs to the final
-  estimator. :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
+  :class:`Pipeline` class now support passing prediction kwargs to
+  the final estimator.
+  :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
 
 :mod:`sklearn.preprocessing`
 ............................
@@ -790,22 +807,13 @@ Changelog
   a tuple to `degree`, i.e. `degree=(min_degree, max_degree)`.
   :pr:`20250` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Efficiency| :class:`preprocessing.StandardScaler` is faster and more memory
+- |Efficiency| `preprocessing.StandardScaler` is faster and more memory
   efficient. :pr:`20652` by `Thomas Fan`_.
 
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``.
   :pr:`19934` by :user:`Gleb Levitskiy <GLevV>`.
 
-- |Efficiency| The implementation of `fit` for
-  :class:`preprocessing.PolynomialFeatures` transformer is now faster. This is
-  especially noticeable on large sparse input. :pr:`19734` by :user:`Fred
-  Robinson <frrad>`.
-
-- |Fix| The :func:`preprocessing.StandardScaler.inverse_transform` method
-  now raises error when the input data is 1D. :pr:`19752` by :user:`Zhehao Liu
-  <Max1993Liu>`.
-
 - |Fix| :func:`preprocessing.scale`, :class:`preprocessing.StandardScaler`
   and similar scalers detect near-constant features to avoid scaling them to
   very large values. This problem happens in particular when using a scaler on
@@ -818,7 +826,7 @@ Changelog
   correctly handles integer dtypes. :pr:`19356` by :user:`makoeppel`.
 
 - |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` is not
-  supporting sparse matrix and raises the appropriate error message.
+  supporting sparse matrix and raise the appropriate error message.
   :pr:`19879` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - |Fix| The `fit` method of :class:`preprocessing.OrdinalEncoder` will not
@@ -831,14 +839,14 @@ Changelog
   (`np.float64` or `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
 
-- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
-  based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.
-
 - |API| The `n_input_features_` attribute of
   :class:`preprocessing.PolynomialFeatures` is deprecated in favor of
   `n_features_in_` and will be removed in 1.2. :pr:`20240` by
   :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
+  based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.
+
 :mod:`sklearn.svm`
 ...................
 
@@ -882,8 +890,8 @@ Changelog
   :pr:`19948` by `Joel Nothman`_.
 
 - |Enhancement| :func:`utils.validation.check_is_fitted` now uses
-  ``__sklearn_is_fitted__`` if available, instead of checking for attributes
-  ending with an underscore. This also makes :class:`pipeline.Pipeline` and
+  ``__sklearn_is_fitted__`` if available, instead of checking for attributes ending with
+  an underscore. This also makes :class:`Pipeline` and
   :class:`preprocessing.FunctionTransformer` pass
   ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_.
 
@@ -891,9 +899,8 @@ Changelog
   precision of the computed variance was very poor when the real variance is
   exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| The docstrings of propreties that are decorated with
-  :func:`utils.deprecated` are now properly wrapped. :pr:`20385` by `Thomas
-  Fan`_.
+- |Fix| Propreties that are decorated with :func:`utils.deprecated` correctly
+  wraps the property's docstring. :pr:`20385` by `Thomas Fan`_.
 
 - |Fix| :func:`utils.stats._weighted_percentile` now correctly ignores
   zero-weighted observations smaller than the smallest observation with
@@ -916,10 +923,6 @@ Changelog
   `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
 
-- |Fix| Support for `np.matrix` is deprecated in
-  :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in
-  1.2. :pr:`20165` by `Thomas Fan`_.
-
 - |API| :func:`utils._testing.assert_warns` and
   :func:`utils._testing.assert_warns_message` are deprecated in 1.0 and will
   be removed in 1.2. Used `pytest.warns` context manager instead. Note that
@@ -930,64 +933,17 @@ Changelog
   now deprecated. Use `scipy.sparse.csgraph.shortest_path` instead. :pr:`20531`
   by `Tom Dupre la Tour`_.
 
+:mod:`sklearn.validation`
+.........................
+
+- |Fix| Support for `np.matrix` is deprecated in
+  :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in
+  1.2. :pr:`20165` by `Thomas Fan`_.
+
 Code and Documentation Contributors
 -----------------------------------
 
 Thanks to everyone who has contributed to the maintenance and improvement of
 the project since version 0.24, including:
 
-Abdulelah S. Al Mesfer, Abhinav Gupta, Adam J. Stewart, Adam Li, Adam Midvidy,
-Adrian Garcia Badaracco, Adrian Sadłocha, Adrin Jalali, Agamemnon Krasoulis,
-Alberto Rubiales, Albert Thomas, Albert Villanova del Moral, Alek Lefebvre,
-Alessia Marcolini, Alexandr Fonari, Alihan Zihna, Aline Ribeiro de Almeida,
-Amanda, Amanda Dsouza, Amol Deshmukh, Ana Pessoa, Anavelyz, Andreas Mueller,
-Andrew Delong, Ashish, Ashvith Shetty, Atsushi Nukariya, Avi Gupta, Ayush
-Singh, baam, BaptBillard, Benjamin Pedigo, Bertrand Thirion, Bharat
-Raghunathan, bmalezieux, Brian Rice, Brian Sun, Bruno Charron, Bryan Chen,
-bumblebee, caherrera-meli, Carsten Allefeld, CeeThinwa, Chiara Marmo,
-chrissobel, Christian Lorentzen, Christopher Yeh, Chuliang Xiao, Clément
-Fauchereau, cliffordEmmanuel, Conner Shen, Connor Tann, David Dale, David Katz,
-David Poznik, Divyanshu Deoli, dmallia17, Dmitry Kobak, DS_anas, Eduardo
-Jardim, EdwinWenink, EL-ATEIF Sara, Eleni Markou, Eric Fiegel, Erich Schubert,
-Ezri-Mudde, Fatos Morina, Felipe Rodrigues, Felix Hafner, Fenil Suchak,
-flyingdutchman23, Flynn, Fortune Uwha, Francois Berenger, Frankie Robertson,
-Frans Larsson, Frederick Robinson, Gabriel S Vicente, Gael Varoquaux, genvalen,
-Geoffrey Thomas, geroldcsendes, Gleb Levitskiy, Glen, Glòria Macià Muñoz,
-gregorystrubel, groceryheist, Guillaume Lemaitre, guiweber, Haidar Almubarak,
-Hans Moritz Günther, Haoyin Xu, Harris Mirza, Harry Wei, Harutaka Kawamura,
-Hassan Alsawadi, Helder Geovane Gomes de Lima, Hugo DEFOIS, Igor Ilic, Ikko
-Ashimine, Isaack Mungui, Ishaan Bhat, Ishan Mishra, Iván Pulido, iwhalvic,
-Jack Liu, James Alan Preiss, James Budarz, James Lamb, Jannik, Jeff Zhao,
-Jennifer Maldonado, Jérémie du Boisberranger, Jesse Lima, Jianzhu Guo,
-jnboehm, Joel Nothman, JohanWork, John Paton, Jonathan Schneider, Jon Crall,
-Jon Haitz Legarreta Gorroño, Joris Van den Bossche, José Manuel Nápoles
-Duarte, Juan Carlos Alfaro Jiménez, Juan Martin Loyola, Julien Jerphanion,
-Julio Batista Silva, julyrashchenko, JVM, Kadatatlu Kishore, Karen Palacio, Kei
-Ishikawa, kmatt10, kobaski, Kot271828, Kunj, KurumeYuta, kxytim, lacrosse91,
-LalliAcqua, Laveen Bagai, Leonardo Rocco, Leonardo Uieda, Leopoldo Corona, Loic
-Esteve, LSturtew, Luca Bittarello, Luccas Quadros, Lucy Jiménez, Lucy Liu,
-ly648499246, Mabu Manaileng, makoeppel, Marco Gorelli, Maren Westermann,
-Mariangela, Maria Telenczuk, marielaraj, Martin Hirzel, Mateo Noreña, Mathieu
-Blondel, Mathis Batoul, mathurinm, Matthew Calcote, Maxime Prieur, Maxwell,
-Mehdi Hamoumi, Mehmet Ali Özer, Miao Cai, Michal Karbownik, michalkrawczyk,
-Mitzi, mlondschien, Mohamed Haseeb, Mohamed Khoualed, Muhammad Jarir Kanji,
-murata-yu, Nadim Kawwa, Nanshan Li, naozin555, Nate Parsons, Neal Fultz, Nic
-Annau, Nicolas Hug, Nicolas Miller, Nico Stefani, Nigel Bosch, Nodar
-Okroshiashvili, Norbert Preining, novaya, Ogbonna Chibuike Stephen, OGordon100,
-Oliver Pfaffel, Olivier Grisel, Oras Phongpanangam, Pablo Duque, Pablo
-Ibieta-Jimenez, Patric Lacouth, Paulo S. Costa, Paweł Olszewski, Peter Dye,
-PierreAttard, Pierre-Yves Le Borgne, PranayAnchuri, Prince Canuma, putschblos,
-qdeffense, RamyaNP, ranjanikrishnan, Ray Bell, Rene Jean Corneille, Reshama
-Shaikh, ricardojnf, RichardScottOZ, Rodion Martynov, Rohan Paul, Roman Lutz,
-Roman Yurchak, Samuel Brice, Sandy Khosasi, Sean Benhur J, Sebastian Flores,
-Sebastian Pölsterl, Shao Yang Hong, shinehide, shinnar, shivamgargsya,
-Shooter23, Shuhei Kayawari, Shyam Desai, simonamaggio, Sina Tootoonian,
-solosilence, Steven Kolawole, Steve Stagg, Surya Prakash, swpease, Sylvain
-Marié, Takeshi Oura, Terence Honles, TFiFiE, Thomas A Caswell, Thomas J. Fan,
-Tim Gates, TimotheeMathieu, Timothy Wolodzko, Tim Vink, t-jakubek, t-kusanagi,
-tliu68, Tobias Uhmann, tom1092, Tomás Moreyra, Tomás Ronald Hughes, Tom
-Dupré la Tour, Tommaso Di Noto, Tomohiro Endo, Toshihiro NAKAE, tsuga, Uttam
-kumar, vadim-ushtanit, Vangelis Gkiastas, Venkatachalam N, Vilém Zouhar,
-Vinicius Rios Fuck, Vlasovets, waijean, Whidou, xavier dupré, xiaoyuchai,
-Yasmeen Alsaedy, yoch, Yosuke KOBAYASHI, Yu Feng, YusukeNagasaka, yzhenman,
-Zero, ZeyuSun, ZhaoweiWang, Zito, Zito Relova
+TODO: update at the time of the release.

From 71cf7cf2c97daed456a230e2d0f029be5a90d705 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 12:59:36 +0200
Subject: [PATCH 058/127] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 8dcc12268c0ec..e8559c4908805 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -638,11 +638,6 @@ Changelog
 - |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for
   quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
   and :user:`Oliver Grisel <ogrisel>`.
-  
-- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`
-  and :func:`metrics.specificity_score`.
-  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
-  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
 
 - |Feature| :func:`metrics.d2_tweedie_score` calculates the D^2 regression
   score for Tweedie deviances with power parameter ``power``. This is a

From fa77a9695b4ba0c3cb732696c5e4b5dafe650345 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 13:05:26 +0200
Subject: [PATCH 059/127] Update v1.1.rst

---
 doc/whats_new/v1.1.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index fba40e25a9e7e..79a3400241e54 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -38,6 +38,14 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+:mod:`sklearn.metrics`
+......................
+
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`
+  and :func:`metrics.specificity_score`.
+  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
+  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
+
 :mod:`sklearn.utils`
 ....................
 

From dc81e7ef5829cb7fca42dc05cb607705af31f2c5 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 14:01:49 +0200
Subject: [PATCH 060/127] Update _classification.py

Update docs of tpr_fpr_tnr_fnr_scores and specificity_score
---
 sklearn/metrics/_classification.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index d39a69ece411f..fdf71161c9da0 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2664,17 +2664,17 @@ def tpr_fpr_tnr_fnr_scores(
 ):
     """Compute TPR, FPR, TNR, FNR for each class.
 
-    The TPR is the ratio `TP / (TP + FN)` where `tp` is the number of
-    true positives and `fn` the number of false negatives.
+    True Positive Rate (TPR) is the ratio `TP / (TP + FN)` where `TP`
+    is the number of true positives and `FN` the number of false negatives.
 
-    The FPR is the ratio `FP / (TN + FP)` where `tn` is the number of
-    true negatives and `FP` the number of false positives.
+    False Positive Rate (FPR) is the ratio `FP / (TN + FP)` where `TN`
+    is the number of true negatives and `FP` the number of false positives.
 
-    The TNR is the ratio `TN / (TN + FP)` where `tn` is the number of
-    true negatives and `FP` the number of false positives.
+    True Negative Rate (TNR) is the ratio `TN / (TN + FP)` where `TN`
+    is the number of true negatives and `FP` the number of false positives.
 
-    The FNR is the ratio `FN / (TP + FN)` where `tp` is the number of
-    true positives and `FN` the number of false negatives.
+    False Negative Rate (FNR) is the ratio `FN / (TP + FN)` where `TP`
+    is the number of true positives and `FN` the number of false negatives.
 
     If `pos_label is None` and in binary classification, this function
     returns the true positive rate, false positive rate, true negative rate
@@ -2809,20 +2809,20 @@ def tpr_fpr_tnr_fnr_scores(
         labels=labels,
         samplewise=samplewise,
     )
-    tn_sum = MCM[:, 0, 0]
+    tp_sum = MCM[:, 1, 1]
     fp_sum = MCM[:, 0, 1]
+    tn_sum = MCM[:, 0, 0]
     fn_sum = MCM[:, 1, 0]
-    tp_sum = MCM[:, 1, 1]
+    pos_sum = tp_sum + fn_sum
     neg_sum = tn_sum + fp_sum
-    pos_sum = fn_sum + tp_sum
 
     if average == "micro":
         tp_sum = np.array([tp_sum.sum()])
         fp_sum = np.array([fp_sum.sum()])
         tn_sum = np.array([tn_sum.sum()])
         fn_sum = np.array([fn_sum.sum()])
-        neg_sum = np.array([neg_sum.sum()])
         pos_sum = np.array([pos_sum.sum()])
+        neg_sum = np.array([neg_sum.sum()])
 
     # Divide, and on zero-division, set scores and/or warn according to
     # zero_division:
@@ -2859,10 +2859,10 @@ def tpr_fpr_tnr_fnr_scores(
 
     if average is not None:
         assert average != "binary" or len(fpr) == 1
+        tpr = np.average(tpr, weights=weights)
         fpr = np.average(fpr, weights=weights)
         tnr = np.average(tnr, weights=weights)
         fnr = np.average(fnr, weights=weights)
-        tpr = np.average(tpr, weights=weights)
     return tpr, fpr, tnr, fnr
 
 
@@ -2881,7 +2881,7 @@ def specificity_score(
     The specificity is the ratio `TN / (TN + FP)` where `TN` is the number
     of true negatives and `FP` is the number of false positives.
     The specificity is intuitively the ability of the classifier to find
-    all the negative samples.
+    all the negative samples. It is also called selectivity.
 
     The best value is 1 and the worst value is 0.
 

From 2dc9b25c80c50ae11809a5cb784ce18ac263c04c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 18:39:30 +0200
Subject: [PATCH 061/127] Update _classification.py

Update docs of tpr_fpr_tnr_fnr_scores and specificity_score
---
 sklearn/metrics/_classification.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index fdf71161c9da0..74ce1405f20bd 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2662,18 +2662,20 @@ def tpr_fpr_tnr_fnr_scores(
     sample_weight=None,
     zero_division="warn",
 ):
-    """Compute TPR, FPR, TNR, FNR for each class.
+    """Compute the TPR, FPR, TNR, FNR for each class.
 
-    True Positive Rate (TPR) is the ratio `TP / (TP + FN)` where `TP`
+    .. versionadded:: 1.1
+
+    The true positive rate (TPR) is the ratio `TP / (TP + FN)` where `TP`
     is the number of true positives and `FN` the number of false negatives.
 
-    False Positive Rate (FPR) is the ratio `FP / (TN + FP)` where `TN`
+    The false positive rate (FPR) is the ratio `FP / (TN + FP)` where `TN`
     is the number of true negatives and `FP` the number of false positives.
 
-    True Negative Rate (TNR) is the ratio `TN / (TN + FP)` where `TN`
+    The true negative rate (TNR) is the ratio `TN / (TN + FP)` where `TN`
     is the number of true negatives and `FP` the number of false positives.
 
-    False Negative Rate (FNR) is the ratio `FN / (TP + FN)` where `TP`
+    The false negative rate (FNR) is the ratio `FN / (TP + FN)` where `TP`
     is the number of true positives and `FN` the number of false negatives.
 
     If `pos_label is None` and in binary classification, this function
@@ -2876,7 +2878,9 @@ def specificity_score(
     sample_weight=None,
     zero_division="warn",
 ):
-    """Compute specificity, also known as true negative rate.
+    """Compute the specificity, also known as the true negative rate (TNR).
+
+    .. versionadded:: 1.1
 
     The specificity is the ratio `TN / (TN + FP)` where `TN` is the number
     of true negatives and `FP` is the number of false positives.
@@ -2944,13 +2948,14 @@ def specificity_score(
     -------
     specificity : float (if average is not None) or array of float of shape
         (n_unique_labels,)
-        Specificity of the positive class in binary classification or weighted
-        average of the specificity of each class for the multiclass task.
+        The specificity of the positive class in binary classification or 
+        weighted average of the specificity of each class for the multiclass
+        task.
 
     See Also
     --------
-    classification_report, precision_recall_fscore_support, recall_score,
-    balanced_accuracy_score, multilabel_confusion_matrix,
+    classification_report, precision_recall_fscore_support, precision_score,
+    recall_score, balanced_accuracy_score, multilabel_confusion_matrix,
     tpr_fpr_tnr_fnr_scores
 
     Notes

From 23f6d01e4cf76a44eeedcb9f47fa153fbde75e09 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 18:48:13 +0200
Subject: [PATCH 062/127] Fix linting

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 74ce1405f20bd..b3dbf4db01d52 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2967,7 +2967,7 @@ def specificity_score(
     References
     ----------
     .. [1] `Wikipedia entry for sensitivity and specificity
-           <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_.
+           <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
 
     Examples
     --------

From c7084c4995ff5a886176393f2dc14129f5a00ff8 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 19:07:11 +0200
Subject: [PATCH 063/127] Update docs in _classification.py

---
 sklearn/metrics/_classification.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b3dbf4db01d52..76684e0c84cb7 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2747,18 +2747,18 @@ def tpr_fpr_tnr_fnr_scores(
 
     Returns
     -------
-    tpr : float (if average is not None), \
-        or ndarray of shape (n_unique_labels,)
+    tpr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
 
-    fpr : float (if average is not None), \
-        or ndarray of shape (n_unique_labels,)
+    fpr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
 
-    tnr : float (if average is not None), \
-        or ndarray of shape (n_unique_labels,)
+    tnr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
 
-    fnr : float (if average is not None), \
-        or ndarray of shape (n_unique_labels,)
-        The number of occurrences of each label in `y_true`.
+    fnr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+
+    See Also
+    --------
+    classification_report, precision_recall_fscore_support, precision_score,
+    recall_score, balanced_accuracy_score, multilabel_confusion_matrix
 
     References
     ----------
@@ -2946,9 +2946,8 @@ def specificity_score(
 
     Returns
     -------
-    specificity : float (if average is not None) or array of float of shape
-        (n_unique_labels,)
-        The specificity of the positive class in binary classification or 
+    specificity : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+        The specificity of the positive class in binary classification or
         weighted average of the specificity of each class for the multiclass
         task.
 

From 77421ae530dae2ef822a7e2b50eb78a57f22ace1 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 19:44:54 +0200
Subject: [PATCH 064/127] Add npv_score

---
 sklearn/metrics/_classification.py | 159 +++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 76684e0c84cb7..1da48ed0a416d 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2998,3 +2998,162 @@ def specificity_score(
         zero_division=zero_division,
     )
     return tnr
+
+
+def npv_score(
+    y_true,
+    y_pred,
+    labels=None,
+    pos_label=1,
+    average="binary",
+    sample_weight=None,
+    zero_division="warn",
+):
+    """Compute the negative predictive value (NPV).
+
+    .. versionadded:: 1.1
+
+    The NPV is the ratio `TN / (TN + FN)` where `TN` is the number of true
+    negatives and `FN` is the number of false negatives.
+    The NPV is intuitively the ability of the classifier to mark the negative
+    samples correctly.
+
+    The best value is 1 and the worst value is 0.
+
+    Parameters
+    ----------
+    y_true : 1d array-like, or label indicator array / sparse matrix
+        Ground truth (correct) target values.
+    y_pred : 1d array-like, or label indicator array / sparse matrix
+        Estimated targets as returned by a classifier.
+    labels : array-like, default=None
+        The set of labels to include when `average != "binary"`, and their
+        order if `average is None`. Labels present in the data can be
+        excluded, for example to calculate a multiclass average ignoring a
+        majority negative class, while labels not present in the data will
+        result in 0 components in a macro average. For multilabel targets,
+        labels are column indices. By default, all labels in `y_true` and
+        `y_pred` are used in sorted order.
+    pos_label : str or int, default=1
+        The class to report if `average="binary"` and the data is binary.
+        If the data are multiclass or multilabel, this will be ignored;
+        setting `labels=[pos_label]` and `average != "binary"` will report
+        scores for that label only.
+    average : str, {None, "binary", "micro", "macro", "samples", "weighted"} \
+            default="binary"
+        This parameter is required for multiclass/multilabel targets.
+        If `None`, the scores for each class are returned. Otherwise, this
+        determines the type of averaging performed on the data:
+        `"binary"`:
+            Only report results for the class specified by `pos_label`.
+            This is applicable only if targets (`y_{true,pred}`) are binary.
+        `"micro"`:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
+        `"macro"`:
+            Calculate metrics for each label, and find their unweighted
+            mean.  This does not take label imbalance into account.
+        `"weighted"`:
+            Calculate metrics for each label, and find their average weighted
+            by support (the number of true instances for each label). This
+            alters 'macro' to account for label imbalance; it can result in an
+            F-score that is not between precision and recall.
+        `"samples"`:
+            Calculate metrics for each instance, and find their average (only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`).
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+    zero_division : "warn", 0 or 1, default="warn"
+        Sets the value to return when there is a zero division. If set to
+        "warn", this acts as 0, but warnings are also raised.
+
+    Returns
+    -------
+    NPV : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+        The negative predictive value of the positive class in binary
+        classification or weighted average of the NPV of each class for
+        the multiclass task.
+
+    See Also
+    --------
+    precision_score, classification_report, precision_recall_fscore_support,
+    recall_score, balanced_accuracy_score, multilabel_confusion_matrix
+
+    Notes
+    -----
+    When `true negative + false negative == 0`, npv_score returns 0 and
+    raises `UndefinedMetricWarning`. This behavior can be modified with
+    `zero_division`.
+
+    References
+    ----------
+    .. [1] `Wikipedia entry for positive and negative predictive values
+           (PPV and NPV respectively)
+           <https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values>`_
+
+    Examples
+    --------
+    >>> from sklearn.metrics import npv_score
+    >>> y_true = [0, 1, 2, 0, 1, 2]
+    >>> y_pred = [0, 2, 1, 0, 0, 1]
+    >>> npv_score(y_true, y_pred, average='macro')
+    0.70...
+    >>> npv_score(y_true, y_pred, average='micro')
+    0.66...
+    >>> npv_score(y_true, y_pred, average='weighted')
+    0.70...
+    >>> npv_score(y_true, y_pred, average=None)
+    array([1. , 0.5, 0.6])
+    >>> y_pred = [0, 0, 0, 0, 0, 0]
+    >>> npv_score(y_true, y_pred, average=None)
+    array([0. , 0.66..., 0.66...])
+    >>> npv_score(y_true, y_pred, average=None, zero_division=1)
+    array([1. , 0.66..., 0.66...])
+    """
+    _check_zero_division(zero_division)
+
+    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
+
+    # Calculate tn_sum, fn_sum, neg_calls_sum
+    samplewise = average == "samples"
+    MCM = multilabel_confusion_matrix(
+        y_true,
+        y_pred,
+        sample_weight=sample_weight,
+        labels=labels,
+        samplewise=samplewise,
+    )
+    tp_sum = MCM[:, 1, 1]
+    tn_sum = MCM[:, 0, 0]
+    fn_sum = MCM[:, 1, 0]
+    pos_sum = tp_sum + fn_sum
+    neg_calls_sum = tn_sum + fn_sum
+
+    if average == "micro":
+        tn_sum = np.array([tn_sum.sum()])
+        fn_sum = np.array([fn_sum.sum()])
+        neg_calls_sum = np.array([neg_calls_sum.sum()])
+
+    # Divide, and on zero-division, set scores and/or warn according to
+    # zero_division:
+    NPV = _prf_divide(
+        tn_sum, neg_calls_sum, "NPV", "negative call", average, "NPV", zero_division
+    )
+
+    # Average the results
+    if average == "weighted":
+        weights = pos_sum
+        if weights.sum() == 0:
+            zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
+            # NPV is zero_division if there are no negative calls
+            return zero_division_value if neg_calls_sum.sum() == 0 else 0
+    elif average == "samples":
+        weights = sample_weight
+    else:
+        weights = None
+    if average is not None:
+        assert average != "binary" or len(NPV) == 1
+        NPV = np.average(NPV, weights=weights)
+
+    return NPV

From 5027a6af7b9a16bf44e358e6811952d874d2d081 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 20:09:54 +0200
Subject: [PATCH 065/127] Add npv_score

---
 sklearn/metrics/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 465f9504922f9..21739b99dbf25 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -37,6 +37,7 @@
 from ._classification import multilabel_confusion_matrix
 from ._classification import tpr_fpr_tnr_fnr_scores
 from ._classification import specificity_score
+from ._classification import npv_score
 
 from . import cluster
 from .cluster import adjusted_mutual_info_score

From 8b8a196d5db1ef44f21f3148777cbf2b02f01262 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 20:11:11 +0200
Subject: [PATCH 066/127] Add npv_score

---
 doc/whats_new/v1.1.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 79a3400241e54..fd6aadaafdd3e 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -41,8 +41,8 @@ Changelog
 :mod:`sklearn.metrics`
 ......................
 
-- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`
-  and :func:`metrics.specificity_score`.
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`,
+  :func:`metrics.specificity_score` and :func:`metrics.npv_score`.
   :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
   and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
 

From 483178de4245e60bd804d1c0b264aa5b2d2f3a3c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 20:14:46 +0200
Subject: [PATCH 067/127] Add npv_score

---
 sklearn/metrics/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 21739b99dbf25..3299bbe1706f3 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -149,6 +149,7 @@
     "mutual_info_score",
     "ndcg_score",
     "normalized_mutual_info_score",
+    "npv_score",
     "pair_confusion_matrix",
     "pairwise_distances",
     "pairwise_distances_argmin",

From 34849f6c5dab2880ef2a453d56006840690d70ba Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 15 Sep 2021 20:19:35 +0200
Subject: [PATCH 068/127] npv_score

---
 sklearn/metrics/_scorer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 51945a7b71f42..234dac839b769 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -47,6 +47,7 @@
     jaccard_score,
     mean_absolute_percentage_error,
     specificity_score,
+    npv_score,
 )
 
 from .cluster import adjusted_rand_score
@@ -788,6 +789,7 @@ def make_scorer(
     ("f1", f1_score),
     ("jaccard", jaccard_score),
     ("specificity", specificity_score),
+    ("npv", npv_score),
 ]:
     SCORERS[name] = make_scorer(metric, average="binary")
     for average in ["macro", "micro", "samples", "weighted"]:

From ecc055a0bbffd9038e46d2adc20e6a0eefa6451b Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 07:11:04 +0200
Subject: [PATCH 069/127] Add npv_score

---
 sklearn/metrics/tests/test_score_objects.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 72cbb9b8260d5..1bf1c0e05fd80 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -32,6 +32,7 @@
     roc_auc_score,
     top_k_accuracy_score,
     specificity_score,
+    npv_score,
 )
 from sklearn.metrics import cluster as cluster_module
 from sklearn.metrics import check_scoring
@@ -107,6 +108,10 @@
     "specificity_weighted",
     "specificity_macro",
     "specificity_micro",
+    "npv",
+    "npv_weighted",
+    "npv_macro",
+    "npv_micro",
 ]
 
 # All supervised cluster scorers (They behave like classification metric)
@@ -128,6 +133,7 @@
     "f1_samples",
     "jaccard_samples",
     "specificity_samples",
+    "npv_samples",
 ]
 
 REQUIRE_POSITIVE_Y_SCORERS = ["neg_mean_poisson_deviance", "neg_mean_gamma_deviance"]
@@ -400,6 +406,7 @@ def test_make_scorer():
         ("jaccard_micro", partial(jaccard_score, average="micro")),
         ("top_k_accuracy", top_k_accuracy_score),
         ("specificity", specificity_score),
+        ("npv", npv_score),
     ],
 )
 def test_classification_binary_scores(scorer_name, metric):
@@ -435,6 +442,9 @@ def test_classification_binary_scores(scorer_name, metric):
         ("specificity_weighted", partial(specificity_score, average="weighted")),
         ("specificity_macro", partial(specificity_score, average="macro")),
         ("specificity_micro", partial(specificity_score, average="micro")),
+        ("npv_weighted", partial(npv_score, average="weighted")),
+        ("npv_macro", partial(npv_score, average="macro")),
+        ("npv_micro", partial(npv_score, average="micro")),
     ],
 )
 def test_classification_multiclass_scores(scorer_name, metric):
@@ -1083,7 +1093,14 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem):
 
 @pytest.mark.parametrize(
     "score_func",
-    [f1_score, precision_score, recall_score, jaccard_score, specificity_score],
+    [
+        f1_score,
+        precision_score,
+        recall_score,
+        jaccard_score,
+        specificity_score,
+        npv_score,
+    ],
 )
 def test_non_symmetric_metric_pos_label(
     score_func, string_labeled_classification_problem

From 136130fd0fa03a9a913f3d37e3f07c2a2c151c70 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 08:34:26 +0200
Subject: [PATCH 070/127] Add `npv_score`

---
 sklearn/tests/test_multiclass.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 03e6ec0540606..f0094821247f1 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -20,6 +20,7 @@
 from sklearn.metrics import precision_score
 from sklearn.metrics import recall_score
 from sklearn.metrics import specificity_score
+from sklearn.metrics import npv_score
 
 from sklearn.svm import LinearSVC, SVC
 from sklearn.naive_bayes import MultinomialNB
@@ -337,8 +338,8 @@ def test_ovr_fit_predict_svc():
 
 def test_ovr_multilabel_dataset():
     base_clf = MultinomialNB(alpha=1)
-    for au, prec, recall, specificity in zip(
-        (True, False), (0.51, 0.66), (0.51, 0.80), (0.66, 0.71)
+    for au, prec, recall, specificity, npv in zip(
+        (True, False), (0.51, 0.66), (0.51, 0.80), (0.66, 0.71), (0.66, 0.84)
     ):
         X, Y = datasets.make_multilabel_classification(
             n_samples=100,
@@ -364,6 +365,9 @@ def test_ovr_multilabel_dataset():
         assert_almost_equal(
             specificity_score(Y_test, Y_pred, average="micro"), specificity, decimal=2
         )
+        assert_almost_equal(
+            npv_score(Y_test, Y_pred, average="micro"), npv, decimal=2
+        )
 
 
 def test_ovr_multilabel_predict_proba():

From 60e27eddda0acbf39a955c199a04c05041ad77c0 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 08:43:40 +0200
Subject: [PATCH 071/127] Fix linting

---
 sklearn/tests/test_multiclass.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index f0094821247f1..933880c8027cb 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -339,7 +339,11 @@ def test_ovr_fit_predict_svc():
 def test_ovr_multilabel_dataset():
     base_clf = MultinomialNB(alpha=1)
     for au, prec, recall, specificity, npv in zip(
-        (True, False), (0.51, 0.66), (0.51, 0.80), (0.66, 0.71), (0.66, 0.84)
+        (True, False),
+        (0.51, 0.66),
+        (0.51, 0.80),
+        (0.66, 0.71),
+        (0.66, 0.84),
     ):
         X, Y = datasets.make_multilabel_classification(
             n_samples=100,

From 20b87a225a125fe2f3e68d79dc28152560f80d88 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 09:19:13 +0200
Subject: [PATCH 072/127] Add `npv_score`

---
 sklearn/metrics/tests/test_common.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 249782f54efae..88897602aa915 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -51,6 +51,7 @@
 from sklearn.metrics import median_absolute_error
 from sklearn.metrics import multilabel_confusion_matrix
 from sklearn.metrics import mean_pinball_loss
+from sklearn.metrics import npv_score
 from sklearn.metrics import precision_recall_curve
 from sklearn.metrics import precision_score
 from sklearn.metrics import r2_score
@@ -149,6 +150,8 @@
     "binary_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="binary"),
     "specificity_score": specificity_score,
     "binary_specificity_score": partial(specificity_score, average="binary"),
+    "npv_score": npv_score,
+    "binary_npv_score": partial(npv_score, average="binary"),
     "weighted_f0.5_score": partial(fbeta_score, average="weighted", beta=0.5),
     "weighted_f1_score": partial(f1_score, average="weighted"),
     "weighted_f2_score": partial(fbeta_score, average="weighted", beta=2),
@@ -159,6 +162,7 @@
         tpr_fpr_tnr_fnr_scores, average="weighted"
     ),
     "weighted_specificity_score": partial(specificity_score, average="weighted"),
+    "weighted_npv_score": partial(npv_score, average="weighted"),
     "micro_f0.5_score": partial(fbeta_score, average="micro", beta=0.5),
     "micro_f1_score": partial(f1_score, average="micro"),
     "micro_f2_score": partial(fbeta_score, average="micro", beta=2),
@@ -167,6 +171,7 @@
     "micro_jaccard_score": partial(jaccard_score, average="micro"),
     "micro_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="micro"),
     "micro_specificity_score": partial(specificity_score, average="micro"),
+    "micro_npv_score": partial(npv_score, average="micro"),
     "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5),
     "macro_f1_score": partial(f1_score, average="macro"),
     "macro_f2_score": partial(fbeta_score, average="macro", beta=2),
@@ -175,6 +180,7 @@
     "macro_jaccard_score": partial(jaccard_score, average="macro"),
     "macro_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="macro"),
     "macro_specificity_score": partial(specificity_score, average="macro"),
+    "macro_npv_score": partial(npv_score, average="macro"),
     "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5),
     "samples_f1_score": partial(f1_score, average="samples"),
     "samples_f2_score": partial(fbeta_score, average="samples", beta=2),
@@ -185,6 +191,7 @@
         tpr_fpr_tnr_fnr_scores, average="samples"
     ),
     "samples_specificity_score": partial(specificity_score, average="samples"),
+    "samples_npv_score": partial(npv_score, average="samples"),
     "cohen_kappa_score": cohen_kappa_score,
 }
 
@@ -285,6 +292,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_jaccard_score",
     "samples_tpr_fpr_tnr_fnr_scores",
     "samples_specificity_score",
+    "samples_npv_score",
     "coverage_error",
     "unnormalized_multilabel_confusion_matrix_sample",
     "label_ranking_loss",
@@ -314,8 +322,10 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f0.5_score",
     "tpr_fpr_tnr_fnr_scores",
     "specificity_score",
+    "npv_score",
     "binary_tpr_fpr_tnr_fnr_scores",
     "binary_specificity_score",
+    "binary_npv_score",
     # curves
     "roc_curve",
     "precision_recall_curve",
@@ -335,7 +345,9 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f2_score",
     "f0.5_score",
     "jaccard_score",
+    "tpr_fpr_tnr_fnr_scores",
     "specificity_score",
+    "npv_score",
 }
 
 # Threshold-based metrics with an "average" argument
@@ -363,6 +375,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_average_precision_score",
     "tpr_fpr_tnr_fnr_scores",
     "specificity_score",
+    "npv_score",
 }
 
 # Metrics with a "labels" argument
@@ -382,6 +395,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "jaccard_score",
     "tpr_fpr_tnr_fnr_scores",
     "specificity_score",
+    "npv_score",
     "weighted_f0.5_score",
     "weighted_f1_score",
     "weighted_f2_score",
@@ -390,6 +404,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_jaccard_score",
     "weighted_tpr_fpr_tnr_fnr_scores",
     "weighted_specificity_score",
+    "weighted_npv_score",
     "micro_f0.5_score",
     "micro_f1_score",
     "micro_f2_score",
@@ -398,6 +413,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_jaccard_score",
     "micro_tpr_fpr_tnr_fnr_scores",
     "micro_specificity_score",
+    "micro_npv_score",
     "macro_f0.5_score",
     "macro_f1_score",
     "macro_f2_score",
@@ -406,6 +422,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "macro_jaccard_score",
     "macro_tpr_fpr_tnr_fnr_scores",
     "macro_specificity_score",
+    "macro_npv_score",
     "unnormalized_multilabel_confusion_matrix",
     "unnormalized_multilabel_confusion_matrix_sample",
     "cohen_kappa_score",
@@ -453,6 +470,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_jaccard_score",
     "weighted_tpr_fpr_tnr_fnr_scores",
     "weighted_specificity_score",
+    "weighted_npv_score",
     "macro_f0.5_score",
     "macro_f1_score",
     "macro_f2_score",
@@ -461,6 +479,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "macro_jaccard_score",
     "macro_tpr_fpr_tnr_fnr_scores",
     "macro_specificity_score",
+    "macro_npv_score",
     "micro_f0.5_score",
     "micro_f1_score",
     "micro_f2_score",
@@ -469,6 +488,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_jaccard_score",
     "micro_tpr_fpr_tnr_fnr_scores",
     "micro_specificity_score",
+    "micro_npv_score",
     "unnormalized_multilabel_confusion_matrix",
     "samples_f0.5_score",
     "samples_f1_score",
@@ -478,6 +498,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_jaccard_score",
     "samples_tpr_fpr_tnr_fnr_scores",
     "samples_specificity_score",
+    "samples_npv_score",
 }
 
 # Regression metrics with "multioutput-continuous" format support
@@ -504,7 +525,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "jaccard_score",
     "samples_jaccard_score",
     "f1_score",
-    "micro_f1_score",
     "macro_f1_score",
     "weighted_recall_score",
     # P = R = F = accuracy in multiclass case
@@ -515,6 +535,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_recall_score",
     "micro_tpr_fpr_tnr_fnr_scores",
     "micro_specificity_score",
+    "micro_npv_score",
     "matthews_corrcoef_score",
     "mean_absolute_error",
     "mean_squared_error",
@@ -544,6 +565,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f0.5_score",
     "tpr_fpr_tnr_fnr_scores",
     "specificity_score",
+    "npv_score",
     "weighted_f0.5_score",
     "weighted_f1_score",
     "weighted_f2_score",
@@ -551,6 +573,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_jaccard_score",
     "weighted_tpr_fpr_tnr_fnr_scores",
     "weighted_specificity_score",
+    "weighted_npv_score",
     "unnormalized_multilabel_confusion_matrix",
     "macro_f0.5_score",
     "macro_f2_score",
@@ -558,6 +581,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "macro_recall_score",
     "macro_tpr_fpr_tnr_fnr_scores",
     "macro_specificity_score",
+    "macro_npv_score",
     "log_loss",
     "hinge_loss",
     "mean_gamma_deviance",

From 29b682c97718708d348c350ac1e0e843ee9b3457 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 09:25:56 +0200
Subject: [PATCH 073/127] Fix linting

---
 sklearn/tests/test_multiclass.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 933880c8027cb..0262fbda29129 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -369,9 +369,7 @@ def test_ovr_multilabel_dataset():
         assert_almost_equal(
             specificity_score(Y_test, Y_pred, average="micro"), specificity, decimal=2
         )
-        assert_almost_equal(
-            npv_score(Y_test, Y_pred, average="micro"), npv, decimal=2
-        )
+        assert_almost_equal(npv_score(Y_test, Y_pred, average="micro"), npv, decimal=2)
 
 
 def test_ovr_multilabel_predict_proba():

From 079196254f65d9c6efc8ec49b4cfe9a85e999b02 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 09:51:05 +0200
Subject: [PATCH 074/127] Fix test_common.py

---
 sklearn/metrics/tests/test_common.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 88897602aa915..1176ed9042257 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -345,7 +345,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f2_score",
     "f0.5_score",
     "jaccard_score",
-    "tpr_fpr_tnr_fnr_scores",
     "specificity_score",
     "npv_score",
 }

From d04a4e29002c219b9b054678e6d3271969443f1d Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 11:16:47 +0200
Subject: [PATCH 075/127] Add `npv_score`

---
 sklearn/metrics/tests/test_classification.py | 70 +++++++++++++++++++-
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 431b8c258eeb5..3e8ede2f1fe37 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -41,6 +41,7 @@
 from sklearn.metrics import recall_score
 from sklearn.metrics import tpr_fpr_tnr_fnr_scores
 from sklearn.metrics import specificity_score
+from sklearn.metrics import npv_score
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import brier_score_loss
 from sklearn.metrics import multilabel_confusion_matrix
@@ -351,7 +352,7 @@ def test_precision_recall_f_ignored_labels():
 
 
 def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
-    # Test TPR, FPR, TNR, FNR Score for binary classification task
+    # Test TPR, FPR, TNR, FNR scores for binary classification task
     y_true, y_pred, _ = make_prediction(binary=True)
 
     # compute scores with default labels introspection
@@ -382,7 +383,7 @@ def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
 
 
 def test_tpr_fpr_tnr_fnr_scores_multiclass():
-    # Test TPR, FPR, TNR, FNR Score for multiclass classification task
+    # Test TPR, FPR, TNR, FNR scores for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
 
     # compute scores with default labels introspection
@@ -2213,6 +2214,71 @@ def test_specificity_warnings(zero_division):
         pytest.warns(Warning if zero_division == "warn" else None)
 
 
+def test_npv_score_binary_averaged():
+    # Test NPV score for binary classification task
+    y_true, y_pred, _ = make_prediction(binary=True)
+
+    # compute scores with default labels
+    npv_none = npv_score(y_true, y_pred, average=None)
+    assert_array_almost_equal(npv_none, [0.85, 0.73], 2)
+
+    npv_macro = npv_score(y_true, y_pred, average="macro")
+    assert npv_macro == np.mean(npv_none)
+
+    npw_weighted = npv_score(y_true, y_pred, average="weighted")
+    support = np.bincount(y_true)
+    assert npw_weighted == np.average(npv_none, weights=support)
+
+
+def test_npv_score_multiclass():
+    # Test NPV score for multiclass classification task
+    y_true, y_pred, _ = make_prediction(binary=False)
+
+    # compute scores with default labels
+    assert_array_almost_equal(
+        npv_score(y_true, y_pred, average=None), [0.9, 0.58, 0.94], 2
+    )
+
+    # averaging tests
+    assert_array_almost_equal(npv_score(y_true, y_pred, average="micro"), 0.77, 2)
+
+    assert_array_almost_equal(npv_score(y_true, y_pred, average="macro"), 0.81, 2)
+
+    assert_array_almost_equal(npv_score(y_true, y_pred, average="weighted"), 0.78, 2)
+
+    with pytest.raises(ValueError):
+        npv_score(y_true, y_pred, average="samples")
+
+    # same prediction but with and explicit label ordering
+    assert_array_almost_equal(
+        npv_score(y_true, y_pred, labels=[0, 2, 1], average=None), [0.9, 0.94, 0.58], 2
+    )
+
+
+@pytest.mark.parametrize("zero_division", ["warn", 0, 1])
+def test_npv_warnings(zero_division):
+    assert_no_warnings(
+        npv_score,
+        np.array([[1, 1], [1, 1]]),
+        np.array([[0, 0], [0, 0]]),
+        average="micro",
+        zero_division=zero_division,
+    )
+
+    npv_score(
+        np.array([[0, 0], [0, 0]]),
+        np.array([[1, 1], [1, 1]]),
+        average="micro",
+        zero_division=zero_division,
+    )
+    if zero_division == "warn":
+        pytest.warns(Warning if zero_division == "warn" else None)
+
+    npv_score([1, 1], [1, 1])
+    if zero_division == "warn":
+        pytest.warns(Warning if zero_division == "warn" else None)
+
+
 def test_prf_average_binary_data_non_binary():
     # Error if user does not explicitly set non-binary average mode
     y_true_mc = [1, 2, 3, 3]

From 3b5853c617cafe931498afdfcdf7bdcedfd8e80c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 15:09:58 +0200
Subject: [PATCH 076/127] Update docs

---
 sklearn/metrics/_classification.py | 62 ++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 1da48ed0a416d..8f4664d0ce284 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2757,8 +2757,15 @@ def tpr_fpr_tnr_fnr_scores(
 
     See Also
     --------
-    classification_report, precision_recall_fscore_support, precision_score,
-    recall_score, balanced_accuracy_score, multilabel_confusion_matrix
+    classification_report - a text report showing the key classification metrics
+    precision_recall_fscore_support - the key classification metrics
+    precision_score - precision or positive predictive value (PPV)
+    recall_score - recall, sensitivity, hit rate, or true positive rate (TPR)
+    specificity_score - specificity, selectivity or true negative rate (TNR)
+    multilabel_confusion_matrix - confusion matrices for each class or sample
+    balanced_accuracy_score - accuracy metric for imbalanced datasets
+    tpr_fpr_tnr_fnr_scores - four basic (mis-)classification rates
+    npv_score - negative predictive value (NPV)
 
     References
     ----------
@@ -2891,10 +2898,12 @@ def specificity_score(
 
     Parameters
     ----------
-    y_true : 1d array-like, or label indicator array / sparse matrix
+     y_true : {array-like, label indicator array, sparse matrix} \
+            of shape (n_samples,)
         Ground truth (correct) target values.
 
-    y_pred : 1d array-like, or label indicator array / sparse matrix
+    y_pred : {array-like, label indicator array, sparse matrix} \
+            of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
     labels : array-like, default=None
@@ -2949,13 +2958,19 @@ def specificity_score(
     specificity : float or ndarray of shape (n_unique_labels,), dtype=np.float64
         The specificity of the positive class in binary classification or
         weighted average of the specificity of each class for the multiclass
-        task.
+        task. Scalar is returned if averaging (i.e., when `average` is not `None`),
+        array - otherwise.
 
     See Also
     --------
-    classification_report, precision_recall_fscore_support, precision_score,
-    recall_score, balanced_accuracy_score, multilabel_confusion_matrix,
-    tpr_fpr_tnr_fnr_scores
+    classification_report - a text report showing the key classification metrics
+    precision_recall_fscore_support - the key classification metrics
+    precision_score - precision or positive predictive value (PPV)
+    recall_score - recall, sensitivity, hit rate, or true positive rate (TPR)
+    multilabel_confusion_matrix - confusion matrices for each class or sample
+    balanced_accuracy_score - accuracy metric for imbalanced datasets
+    tpr_fpr_tnr_fnr_scores - four basic (mis-)classification rates
+    npv_score - negative predictive value (NPV)
 
     Notes
     -----
@@ -3014,18 +3029,21 @@ def npv_score(
     .. versionadded:: 1.1
 
     The NPV is the ratio `TN / (TN + FN)` where `TN` is the number of true
-    negatives and `FN` is the number of false negatives.
-    The NPV is intuitively the ability of the classifier to mark the negative
-    samples correctly.
+    negatives and `FN` is the number of false negatives. The NPV is intuitively
+    the ability of the classifier to mark the negative samples correctly.
 
     The best value is 1 and the worst value is 0.
 
     Parameters
     ----------
-    y_true : 1d array-like, or label indicator array / sparse matrix
+    y_true : {array-like, label indicator array, sparse matrix} \
+            of shape (n_samples,)
         Ground truth (correct) target values.
-    y_pred : 1d array-like, or label indicator array / sparse matrix
+
+    y_pred : {array-like, label indicator array, sparse matrix} \
+            of shape (n_samples,)
         Estimated targets as returned by a classifier.
+
     labels : array-like, default=None
         The set of labels to include when `average != "binary"`, and their
         order if `average is None`. Labels present in the data can be
@@ -3034,16 +3052,19 @@ def npv_score(
         result in 0 components in a macro average. For multilabel targets,
         labels are column indices. By default, all labels in `y_true` and
         `y_pred` are used in sorted order.
+
     pos_label : str or int, default=1
         The class to report if `average="binary"` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
+
     average : str, {None, "binary", "micro", "macro", "samples", "weighted"} \
             default="binary"
         This parameter is required for multiclass/multilabel targets.
         If `None`, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
+
         `"binary"`:
             Only report results for the class specified by `pos_label`.
             This is applicable only if targets (`y_{true,pred}`) are binary.
@@ -3062,8 +3083,10 @@ def npv_score(
             Calculate metrics for each instance, and find their average (only
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
+
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
+
     zero_division : "warn", 0 or 1, default="warn"
         Sets the value to return when there is a zero division. If set to
         "warn", this acts as 0, but warnings are also raised.
@@ -3073,12 +3096,19 @@ def npv_score(
     NPV : float or ndarray of shape (n_unique_labels,), dtype=np.float64
         The negative predictive value of the positive class in binary
         classification or weighted average of the NPV of each class for
-        the multiclass task.
+        the multiclass task. Scalar is returned if averaging (i.e., when
+        `average` is not `None`), array - otherwise.
 
     See Also
     --------
-    precision_score, classification_report, precision_recall_fscore_support,
-    recall_score, balanced_accuracy_score, multilabel_confusion_matrix
+    classification_report - a text report showing the key classification metrics
+    precision_recall_fscore_support - the key classification metrics
+    precision_score - precision or positive predictive value (PPV)
+    recall_score - recall, sensitivity, hit rate, or true positive rate (TPR)
+    specificity_score - specificity, selectivity or true negative rate (TNR)
+    multilabel_confusion_matrix - confusion matrices for each class or sample
+    balanced_accuracy_score - accuracy metric for imbalanced datasets
+    tpr_fpr_tnr_fnr_scores - four basic (mis-)classification rates
 
     Notes
     -----

From 18361ea5acf821bcb10797c416be8ccbbf8b5a6e Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 16:26:47 +0200
Subject: [PATCH 077/127] Update model_evaluation.rst

---
 doc/modules/model_evaluation.rst | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index f39b29eb8ea86..8cef76dba2ed6 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -72,6 +72,8 @@ Scoring                                Function
 'precision' etc.                       :func:`metrics.precision_score`                    suffixes apply as with 'f1'
 'recall' etc.                          :func:`metrics.recall_score`                       suffixes apply as with 'f1'
 'jaccard' etc.                         :func:`metrics.jaccard_score`                      suffixes apply as with 'f1'
+'specificity' etc.                     :func:`metrics.specificity_score`                  suffixes apply as with 'f1'
+'npv' etc.                             :func:`metrics.npv_score`                          suffixes apply as with 'f1'
 'roc_auc'                              :func:`metrics.roc_auc_score`
 'roc_auc_ovr'                          :func:`metrics.roc_auc_score`
 'roc_auc_ovo'                          :func:`metrics.roc_auc_score`
@@ -338,6 +340,9 @@ Some also work in the multilabel case:
    precision_recall_fscore_support
    precision_score
    recall_score
+   specificity_score
+   npv_score
+   fpr_tpr_fnr_tnr_scores
    roc_auc_score
    zero_one_loss
 
@@ -793,6 +798,7 @@ score:
    precision_recall_fscore_support
    precision_score
    recall_score
+   fpr_tpr_fnr_tnr_scores
 
 Note that the :func:`precision_recall_curve` function is restricted to the
 binary case. The :func:`average_precision_score` function works only in
@@ -806,6 +812,18 @@ precision-recall curve as follows.
         :scale: 75
         :align: center
 
+Precision can also be referred to as the `positive predictive value (PPV)
+<https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values>`_,
+e.g. in the context of bioscience. A closely related metric is 
+`negative predictive value (NPV) <https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values>`_
+, implemented by the :func:`npv_score`.
+
+Recall can also be called the hit rate, or true positive rate (TPR). Especially
+in biostatistics, it is also known as `sensitivity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
+, which is related to `specificity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_.
+In turn, specificity is also referred to as selectivity, or true negative rate (TNR), 
+and is implemented by the :func:`specificity_score`.
+
 .. topic:: Examples:
 
   * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`
@@ -850,10 +868,10 @@ following table:
 +-------------------+------------------------------------------------+
 |                   |    Actual class (observation)                  |
 +-------------------+---------------------+--------------------------+
-|   Predicted class | tp (true positive)  | fp (false positive)      |
+|   Predicted class | TP (true positive)  | FP (false positive)      |
 |   (expectation)   | Correct result      | Unexpected result        |
 |                   +---------------------+--------------------------+
-|                   | fn (false negative) | tn (true negative)       |
+|                   | FN (false negative) | TN (true negative)       |
 |                   | Missing result      | Correct absence of result|
 +-------------------+---------------------+--------------------------+
 

From 797c69f4d47e6955f07c52afc7c7865fb37e0787 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 16:33:16 +0200
Subject: [PATCH 078/127] Update _classification.py

---
 sklearn/metrics/_classification.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 8f4664d0ce284..851ea1c085c58 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2683,6 +2683,8 @@ def tpr_fpr_tnr_fnr_scores(
     and false negative rate if `average` is one of `"micro"`, `"macro"`,
     `"weighted"` or `"samples"`.
 
+    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
+
     Parameters
     ----------
     y_true : {array-like, label indicator array, sparse matrix} \
@@ -2896,6 +2898,8 @@ def specificity_score(
 
     The best value is 1 and the worst value is 0.
 
+    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
+
     Parameters
     ----------
      y_true : {array-like, label indicator array, sparse matrix} \
@@ -3034,6 +3038,8 @@ def npv_score(
 
     The best value is 1 and the worst value is 0.
 
+    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
+
     Parameters
     ----------
     y_true : {array-like, label indicator array, sparse matrix} \

From d1701fb01c632e1250db79dfaa9c70cf4f9256ed Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 16:59:33 +0200
Subject: [PATCH 079/127] Fix docs

---
 sklearn/metrics/_classification.py | 50 +++++++++++++++---------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 851ea1c085c58..304374b0b1f07 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2759,15 +2759,15 @@ def tpr_fpr_tnr_fnr_scores(
 
     See Also
     --------
-    classification_report - a text report showing the key classification metrics
-    precision_recall_fscore_support - the key classification metrics
-    precision_score - precision or positive predictive value (PPV)
-    recall_score - recall, sensitivity, hit rate, or true positive rate (TPR)
-    specificity_score - specificity, selectivity or true negative rate (TNR)
-    multilabel_confusion_matrix - confusion matrices for each class or sample
-    balanced_accuracy_score - accuracy metric for imbalanced datasets
-    tpr_fpr_tnr_fnr_scores - four basic (mis-)classification rates
-    npv_score - negative predictive value (NPV)
+    classification_report : A text report showing the key classification metrics.
+    precision_recall_fscore_support : The key classification metrics.
+    precision_score : Precision or positive predictive value (PPV).
+    recall_score : Recall, sensitivity, hit rate, or true positive rate (TPR).
+    specificity_score : Specificity, selectivity or true negative rate (TNR).
+    multilabel_confusion_matrix : Confusion matrices for each class or sample.
+    balanced_accuracy_score : Accuracy metric for imbalanced datasets.
+    tpr_fpr_tnr_fnr_scores : Four basic (mis-)classification rates.
+    npv_score : Negative predictive value (NPV).
 
     References
     ----------
@@ -2967,14 +2967,14 @@ def specificity_score(
 
     See Also
     --------
-    classification_report - a text report showing the key classification metrics
-    precision_recall_fscore_support - the key classification metrics
-    precision_score - precision or positive predictive value (PPV)
-    recall_score - recall, sensitivity, hit rate, or true positive rate (TPR)
-    multilabel_confusion_matrix - confusion matrices for each class or sample
-    balanced_accuracy_score - accuracy metric for imbalanced datasets
-    tpr_fpr_tnr_fnr_scores - four basic (mis-)classification rates
-    npv_score - negative predictive value (NPV)
+    classification_report : A text report showing the key classification metrics.
+    precision_recall_fscore_support : The key classification metrics.
+    precision_score : Precision or positive predictive value (PPV).
+    recall_score : Recall, sensitivity, hit rate, or true positive rate (TPR).
+    multilabel_confusion_matrix : Confusion matrices for each class or sample.
+    balanced_accuracy_score : Accuracy metric for imbalanced datasets.
+    tpr_fpr_tnr_fnr_scores : Four basic (mis-)classification rates.
+    npv_score : Negative predictive value (NPV).
 
     Notes
     -----
@@ -3107,14 +3107,14 @@ def npv_score(
 
     See Also
     --------
-    classification_report - a text report showing the key classification metrics
-    precision_recall_fscore_support - the key classification metrics
-    precision_score - precision or positive predictive value (PPV)
-    recall_score - recall, sensitivity, hit rate, or true positive rate (TPR)
-    specificity_score - specificity, selectivity or true negative rate (TNR)
-    multilabel_confusion_matrix - confusion matrices for each class or sample
-    balanced_accuracy_score - accuracy metric for imbalanced datasets
-    tpr_fpr_tnr_fnr_scores - four basic (mis-)classification rates
+    classification_report : A text report showing the key classification metrics.
+    precision_recall_fscore_support : The key classification metrics.
+    precision_score : Precision or positive predictive value (PPV).
+    recall_score : Recall, sensitivity, hit rate, or true positive rate (TPR).
+    specificity_score : Specificity, selectivity or true negative rate (TNR).
+    multilabel_confusion_matrix : Confusion matrices for each class or sample.
+    balanced_accuracy_score : Accuracy metric for imbalanced datasets.
+    tpr_fpr_tnr_fnr_scores : Four basic (mis-)classification rates.
 
     Notes
     -----

From dc2602f394b526159f2280dda4120336228f697b Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 17:22:11 +0200
Subject: [PATCH 080/127] Fix model_evaluation.rst

---
 doc/modules/model_evaluation.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 8cef76dba2ed6..e001f4dfe028c 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -798,7 +798,6 @@ score:
    precision_recall_fscore_support
    precision_score
    recall_score
-   fpr_tpr_fnr_tnr_scores
 
 Note that the :func:`precision_recall_curve` function is restricted to the
 binary case. The :func:`average_precision_score` function works only in

From e61eb88886ae035051173efe70e4f86af2e8f2ce Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 18 Sep 2021 18:22:48 +0200
Subject: [PATCH 081/127] Fix docs

---
 doc/modules/model_evaluation.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index e001f4dfe028c..a3871c79a04d3 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -342,7 +342,6 @@ Some also work in the multilabel case:
    recall_score
    specificity_score
    npv_score
-   fpr_tpr_fnr_tnr_scores
    roc_auc_score
    zero_one_loss
 

From d253c5863b17e160161049131530bddfc82d46a9 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 20 Jan 2022 15:15:08 +0100
Subject: [PATCH 082/127] Update _classification.py

Implement review suggestions.
---
 sklearn/metrics/_classification.py | 37 ++++++++++--------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 7ffc8e95b82f2..c8c39b7794311 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2664,8 +2664,6 @@ def tpr_fpr_tnr_fnr_scores(
 ):
     """Compute the TPR, FPR, TNR, FNR for each class.
 
-    .. versionadded:: 1.1
-
     The true positive rate (TPR) is the ratio `TP / (TP + FN)` where `TP`
     is the number of true positives and `FN` the number of false negatives.
 
@@ -2685,6 +2683,8 @@ def tpr_fpr_tnr_fnr_scores(
 
     Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
 
+    .. versionadded:: 1.1
+
     Parameters
     ----------
     y_true : {array-like, label indicator array, sparse matrix} \
@@ -2710,8 +2710,8 @@ def tpr_fpr_tnr_fnr_scores(
         setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
 
-    average : str, {None, "binary", "micro", "macro", "samples", "weighted"}, \
-        default=None
+    average : {"binary", "micro", "macro", "samples", "weighted"} or None, \
+            default=None
         If `None`, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
@@ -2811,7 +2811,6 @@ def tpr_fpr_tnr_fnr_scores(
 
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
-    # Calculate tp_sum, fp_sum, tn_sum, fn_sum, pos_sum, neg_sum
     samplewise = average == "samples"
     MCM = multilabel_confusion_matrix(
         y_true,
@@ -2869,7 +2868,7 @@ def tpr_fpr_tnr_fnr_scores(
         weights = None
 
     if average is not None:
-        assert average != "binary" or len(fpr) == 1
+        assert average != "binary" or len(fpr) == 1, "Non-binary target."
         tpr = np.average(tpr, weights=weights)
         fpr = np.average(fpr, weights=weights)
         tnr = np.average(tnr, weights=weights)
@@ -2889,8 +2888,6 @@ def specificity_score(
 ):
     """Compute the specificity, also known as the true negative rate (TNR).
 
-    .. versionadded:: 1.1
-
     The specificity is the ratio `TN / (TN + FP)` where `TN` is the number
     of true negatives and `FP` is the number of false positives.
     The specificity is intuitively the ability of the classifier to find
@@ -2900,6 +2897,8 @@ def specificity_score(
 
     Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
 
+    .. versionadded:: 1.1
+
     Parameters
     ----------
      y_true : {array-like, label indicator array, sparse matrix} \
@@ -2925,7 +2924,7 @@ def specificity_score(
         setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
 
-    average : str, {None, "binary", "micro", "macro", "samples", "weighted"} \
+    average : {"binary", "micro", "macro", "samples", "weighted"} or None \
             default="binary"
         This parameter is required for multiclass/multilabel targets.
         If `None`, the scores for each class are returned. Otherwise, this
@@ -2967,14 +2966,8 @@ def specificity_score(
 
     See Also
     --------
-    classification_report : A text report showing the key classification metrics.
-    precision_recall_fscore_support : The key classification metrics.
     precision_score : Precision or positive predictive value (PPV).
     recall_score : Recall, sensitivity, hit rate, or true positive rate (TPR).
-    multilabel_confusion_matrix : Confusion matrices for each class or sample.
-    balanced_accuracy_score : Accuracy metric for imbalanced datasets.
-    tpr_fpr_tnr_fnr_scores : Four basic (mis-)classification rates.
-    npv_score : Negative predictive value (NPV).
 
     Notes
     -----
@@ -3030,8 +3023,6 @@ def npv_score(
 ):
     """Compute the negative predictive value (NPV).
 
-    .. versionadded:: 1.1
-
     The NPV is the ratio `TN / (TN + FN)` where `TN` is the number of true
     negatives and `FN` is the number of false negatives. The NPV is intuitively
     the ability of the classifier to mark the negative samples correctly.
@@ -3040,6 +3031,8 @@ def npv_score(
 
     Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
 
+    .. versionadded:: 1.1
+
     Parameters
     ----------
     y_true : {array-like, label indicator array, sparse matrix} \
@@ -3065,7 +3058,7 @@ def npv_score(
         setting `labels=[pos_label]` and `average != "binary"` will report
         scores for that label only.
 
-    average : str, {None, "binary", "micro", "macro", "samples", "weighted"} \
+    average : {"binary", "micro", "macro", "samples", "weighted"}, None \
             default="binary"
         This parameter is required for multiclass/multilabel targets.
         If `None`, the scores for each class are returned. Otherwise, this
@@ -3107,14 +3100,8 @@ def npv_score(
 
     See Also
     --------
-    classification_report : A text report showing the key classification metrics.
-    precision_recall_fscore_support : The key classification metrics.
     precision_score : Precision or positive predictive value (PPV).
     recall_score : Recall, sensitivity, hit rate, or true positive rate (TPR).
-    specificity_score : Specificity, selectivity or true negative rate (TNR).
-    multilabel_confusion_matrix : Confusion matrices for each class or sample.
-    balanced_accuracy_score : Accuracy metric for imbalanced datasets.
-    tpr_fpr_tnr_fnr_scores : Four basic (mis-)classification rates.
 
     Notes
     -----
@@ -3189,7 +3176,7 @@ def npv_score(
     else:
         weights = None
     if average is not None:
-        assert average != "binary" or len(NPV) == 1
+        assert average != "binary" or len(NPV) == 1, "Non-binary target."
         NPV = np.average(NPV, weights=weights)
 
     return NPV

From f38be618341809c97249eb86e73cd5f388cb209d Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 20 Jan 2022 16:32:15 +0100
Subject: [PATCH 083/127] Implement review

Implement review
---
 doc/modules/model_evaluation.rst             |  4 +-
 doc/whats_new/v1.1.rst                       |  2 +-
 sklearn/metrics/__init__.py                  |  4 +-
 sklearn/metrics/_classification.py           | 14 +++---
 sklearn/metrics/tests/test_classification.py | 36 +++++++-------
 sklearn/metrics/tests/test_common.py         | 50 ++++++++++----------
 sklearn/metrics/tests/test_score_objects.py  |  1 +
 7 files changed, 56 insertions(+), 55 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 14bf34b3ec21a..e50e29ad0dff1 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -813,14 +813,14 @@ precision-recall curve as follows.
 
 Precision can also be referred to as the `positive predictive value (PPV)
 <https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values>`_,
-e.g. in the context of bioscience. A closely related metric is 
+e.g. in the context of bioscience. A closely related metric is
 `negative predictive value (NPV) <https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values>`_
 , implemented by the :func:`npv_score`.
 
 Recall can also be called the hit rate, or true positive rate (TPR). Especially
 in biostatistics, it is also known as `sensitivity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
 , which is related to `specificity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_.
-In turn, specificity is also referred to as selectivity, or true negative rate (TNR), 
+In turn, specificity is also referred to as selectivity, or true negative rate (TNR),
 and is implemented by the :func:`specificity_score`.
 
 .. topic:: Examples:
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index df484ca704032..7e0d8e07db40f 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -377,7 +377,7 @@ Changelog
 - |Fix| :func:`metrics.silhouette_score` now supports integer input for precomputed
   distances. :pr:`22108` by `Thomas Fan`_.
  
- - |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_scores`,
+ - |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_score`,
   :func:`metrics.specificity_score` and :func:`metrics.npv_score`.
   :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
   and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 0e08b70c95175..b4ccb89028553 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -35,7 +35,7 @@
 from ._classification import zero_one_loss
 from ._classification import brier_score_loss
 from ._classification import multilabel_confusion_matrix
-from ._classification import tpr_fpr_tnr_fnr_scores
+from ._classification import tpr_fpr_tnr_fnr_score
 from ._classification import specificity_score
 from ._classification import npv_score
 
@@ -178,7 +178,7 @@
     "silhouette_score",
     "specificity_score",
     "top_k_accuracy_score",
-    "tpr_fpr_tnr_fnr_scores",
+    "tpr_fpr_tnr_fnr_score",
     "v_measure_score",
     "zero_one_loss",
     "brier_score_loss",
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 8ae6b232d5ec6..399520af599bc 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2690,7 +2690,7 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
     return np.average((y_true - y_prob) ** 2, weights=sample_weight)
 
 
-def tpr_fpr_tnr_fnr_scores(
+def tpr_fpr_tnr_fnr_score(
     y_true,
     y_pred,
     *,
@@ -2805,7 +2805,7 @@ def tpr_fpr_tnr_fnr_scores(
     specificity_score : Specificity, selectivity or true negative rate (TNR).
     multilabel_confusion_matrix : Confusion matrices for each class or sample.
     balanced_accuracy_score : Accuracy metric for imbalanced datasets.
-    tpr_fpr_tnr_fnr_scores : Four basic (mis-)classification rates.
+    tpr_fpr_tnr_fnr_score : Four basic (mis-)classification rates.
     npv_score : Negative predictive value (NPV).
 
     References
@@ -2823,17 +2823,17 @@ def tpr_fpr_tnr_fnr_scores(
     >>> import numpy as np
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
-    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='macro')
+    >>> tpr_fpr_tnr_fnr_score(y_true, y_pred, average='macro')
     (0.33..., 0.33..., 0.66..., 0.66...)
-    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='micro')
+    >>> tpr_fpr_tnr_fnr_score(y_true, y_pred, average='micro')
     (0.33..., 0.33..., 0.66..., 0.66...)
-    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average='weighted')
+    >>> tpr_fpr_tnr_fnr_score(y_true, y_pred, average='weighted')
     (0.33..., 0.33..., 0.66..., 0.66...)
 
     It is possible to compute per-label FPR, FNR, TNR, TPR and
     supports instead of averaging:
 
-    >>> tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None,
+    >>> tpr_fpr_tnr_fnr_score(y_true, y_pred, average=None,
     ... labels=['pig', 'dog', 'cat'])
     (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]),
     array([0.75, 0.5 , 0.75]), array([1., 1., 0.]))
@@ -3038,7 +3038,7 @@ def specificity_score(
     >>> specificity_score(y_true, y_pred, average=None, zero_division=1)
     array([1. , 0.66..., 0.83...])
     """
-    _, _, tnr, _ = tpr_fpr_tnr_fnr_scores(
+    _, _, tnr, _ = tpr_fpr_tnr_fnr_score(
         y_true,
         y_pred,
         labels=labels,
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index f8b9ef3c00966..c766082200f9c 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -39,7 +39,7 @@
 from sklearn.metrics import precision_recall_fscore_support
 from sklearn.metrics import precision_score
 from sklearn.metrics import recall_score
-from sklearn.metrics import tpr_fpr_tnr_fnr_scores
+from sklearn.metrics import tpr_fpr_tnr_fnr_score
 from sklearn.metrics import specificity_score
 from sklearn.metrics import npv_score
 from sklearn.metrics import zero_one_loss
@@ -351,12 +351,12 @@ def test_precision_recall_f_ignored_labels():
             assert recall_13(average=average) != recall_all(average=average)
 
 
-def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
+def test_tpr_fpr_tnr_fnr_score_binary_averaged():
     # Test TPR, FPR, TNR, FNR scores for binary classification task
     y_true, y_pred, _ = make_prediction(binary=True)
 
     # compute scores with default labels introspection
-    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None)
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_score(y_true, y_pred, average=None)
     assert_array_almost_equal(tprs, [0.88, 0.68], 2)
     assert_array_almost_equal(fprs, [0.32, 0.12], 2)
     assert_array_almost_equal(tnrs, [0.68, 0.88], 2)
@@ -368,13 +368,13 @@ def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
     assert_array_almost_equal(tn / (tn + fp), 0.88, 2)
     assert_array_almost_equal(fn / (tp + fn), 0.32, 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="macro")
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="macro")
     assert tpr == np.mean(tprs)
     assert fpr == np.mean(fprs)
     assert tnr == np.mean(tnrs)
     assert fnr == np.mean(fnrs)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="weighted")
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="weighted")
     support = np.bincount(y_true)
     assert tpr == np.average(tprs, weights=support)
     assert fpr == np.average(fprs, weights=support)
@@ -382,41 +382,41 @@ def test_tpr_fpr_tnr_fnr_scores_binary_averaged():
     assert fnr == np.average(fnrs, weights=support)
 
 
-def test_tpr_fpr_tnr_fnr_scores_multiclass():
+def test_tpr_fpr_tnr_fnr_score_multiclass():
     # Test TPR, FPR, TNR, FNR scores for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
 
     # compute scores with default labels introspection
-    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average=None)
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_score(y_true, y_pred, average=None)
     assert_array_almost_equal(tprs, [0.79, 0.1, 0.9], 2)
     assert_array_almost_equal(fprs, [0.08, 0.14, 0.45], 2)
     assert_array_almost_equal(tnrs, [0.92, 0.86, 0.55], 2)
     assert_array_almost_equal(fnrs, [0.21, 0.9, 0.1], 2)
 
     # averaging tests
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="micro")
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="micro")
     assert_array_almost_equal(tpr, 0.53, 2)
     assert_array_almost_equal(fpr, 0.23, 2)
     assert_array_almost_equal(tnr, 0.77, 2)
     assert_array_almost_equal(fnr, 0.47, 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="macro")
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="macro")
     assert_array_almost_equal(tpr, 0.6, 2)
     assert_array_almost_equal(fpr, 0.22, 2)
     assert_array_almost_equal(tnr, 0.78, 2)
     assert_array_almost_equal(fnr, 0.4, 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="weighted")
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="weighted")
     assert_array_almost_equal(tpr, 0.53, 2)
     assert_array_almost_equal(fpr, 0.2, 2)
     assert_array_almost_equal(tnr, 0.8, 2)
     assert_array_almost_equal(fnr, 0.47, 2)
 
     with pytest.raises(ValueError):
-        tpr_fpr_tnr_fnr_scores(y_true, y_pred, average="samples")
+        tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")
 
     # same prediction but with and explicit label ordering
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true, y_pred, labels=[0, 2, 1], average=None
     )
     assert_array_almost_equal(tpr, [0.79, 0.9, 0.1], 2)
@@ -426,14 +426,14 @@ def test_tpr_fpr_tnr_fnr_scores_multiclass():
 
 
 @pytest.mark.parametrize("zero_division", ["warn", 0, 1])
-def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
+def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
 
     pytest.warns(Warning if zero_division == "warn" else None)
 
     zero_division_value = 1.0 if zero_division == 1.0 else 0.0
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true, y_pred, average=None, zero_division=zero_division
     )
     assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division_value], 2)
@@ -441,7 +441,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
     assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division_value], 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true, y_pred, average="macro", zero_division=zero_division
     )
     assert_almost_equal(tpr, 0.625 if zero_division_value else 0.375)
@@ -449,7 +449,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 0.91666, 5)
     assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true, y_pred, average="micro", zero_division=zero_division
     )
     assert_almost_equal(tpr, 0.5)
@@ -457,7 +457,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 0.875)
     assert_almost_equal(fnr, 0.5)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true, y_pred, average="weighted", zero_division=zero_division
     )
     assert_almost_equal(tpr, 0.5)
@@ -465,7 +465,7 @@ def test_tpr_fpr_tnr_fnr_scores_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 1.0)
     assert_almost_equal(fnr, 0.5)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_scores(
+    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true,
         y_pred,
         average="samples",
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 36ab3a46d6781..0b94f77248f54 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -59,7 +59,7 @@
 from sklearn.metrics import roc_auc_score
 from sklearn.metrics import roc_curve
 from sklearn.metrics import specificity_score
-from sklearn.metrics import tpr_fpr_tnr_fnr_scores
+from sklearn.metrics import tpr_fpr_tnr_fnr_score
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import ndcg_score
 from sklearn.metrics import dcg_score
@@ -146,8 +146,8 @@
     "f2_score": partial(fbeta_score, beta=2),
     "f0.5_score": partial(fbeta_score, beta=0.5),
     "matthews_corrcoef_score": matthews_corrcoef,
-    "tpr_fpr_tnr_fnr_scores": tpr_fpr_tnr_fnr_scores,
-    "binary_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="binary"),
+    "tpr_fpr_tnr_fnr_score": tpr_fpr_tnr_fnr_score,
+    "binary_tpr_fpr_tnr_fnr_score": partial(tpr_fpr_tnr_fnr_score, average="binary"),
     "specificity_score": specificity_score,
     "binary_specificity_score": partial(specificity_score, average="binary"),
     "npv_score": npv_score,
@@ -158,8 +158,8 @@
     "weighted_precision_score": partial(precision_score, average="weighted"),
     "weighted_recall_score": partial(recall_score, average="weighted"),
     "weighted_jaccard_score": partial(jaccard_score, average="weighted"),
-    "weighted_tpr_fpr_tnr_fnr_scores": partial(
-        tpr_fpr_tnr_fnr_scores, average="weighted"
+    "weighted_tpr_fpr_tnr_fnr_score": partial(
+        tpr_fpr_tnr_fnr_score, average="weighted"
     ),
     "weighted_specificity_score": partial(specificity_score, average="weighted"),
     "weighted_npv_score": partial(npv_score, average="weighted"),
@@ -169,7 +169,7 @@
     "micro_precision_score": partial(precision_score, average="micro"),
     "micro_recall_score": partial(recall_score, average="micro"),
     "micro_jaccard_score": partial(jaccard_score, average="micro"),
-    "micro_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="micro"),
+    "micro_tpr_fpr_tnr_fnr_score": partial(tpr_fpr_tnr_fnr_score, average="micro"),
     "micro_specificity_score": partial(specificity_score, average="micro"),
     "micro_npv_score": partial(npv_score, average="micro"),
     "macro_f0.5_score": partial(fbeta_score, average="macro", beta=0.5),
@@ -178,7 +178,7 @@
     "macro_precision_score": partial(precision_score, average="macro"),
     "macro_recall_score": partial(recall_score, average="macro"),
     "macro_jaccard_score": partial(jaccard_score, average="macro"),
-    "macro_tpr_fpr_tnr_fnr_scores": partial(tpr_fpr_tnr_fnr_scores, average="macro"),
+    "macro_tpr_fpr_tnr_fnr_score": partial(tpr_fpr_tnr_fnr_score, average="macro"),
     "macro_specificity_score": partial(specificity_score, average="macro"),
     "macro_npv_score": partial(npv_score, average="macro"),
     "samples_f0.5_score": partial(fbeta_score, average="samples", beta=0.5),
@@ -187,8 +187,8 @@
     "samples_precision_score": partial(precision_score, average="samples"),
     "samples_recall_score": partial(recall_score, average="samples"),
     "samples_jaccard_score": partial(jaccard_score, average="samples"),
-    "samples_tpr_fpr_tnr_fnr_scores": partial(
-        tpr_fpr_tnr_fnr_scores, average="samples"
+    "samples_tpr_fpr_tnr_fnr_score": partial(
+        tpr_fpr_tnr_fnr_score, average="samples"
     ),
     "samples_specificity_score": partial(specificity_score, average="samples"),
     "samples_npv_score": partial(npv_score, average="samples"),
@@ -290,7 +290,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_precision_score",
     "samples_recall_score",
     "samples_jaccard_score",
-    "samples_tpr_fpr_tnr_fnr_scores",
+    "samples_tpr_fpr_tnr_fnr_score",
     "samples_specificity_score",
     "samples_npv_score",
     "coverage_error",
@@ -320,10 +320,10 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f1_score",
     "f2_score",
     "f0.5_score",
-    "tpr_fpr_tnr_fnr_scores",
+    "tpr_fpr_tnr_fnr_score",
     "specificity_score",
     "npv_score",
-    "binary_tpr_fpr_tnr_fnr_scores",
+    "binary_tpr_fpr_tnr_fnr_score",
     "binary_specificity_score",
     "binary_npv_score",
     # curves
@@ -372,7 +372,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_average_precision_score",
     "micro_average_precision_score",
     "samples_average_precision_score",
-    "tpr_fpr_tnr_fnr_scores",
+    "tpr_fpr_tnr_fnr_score",
     "specificity_score",
     "npv_score",
 }
@@ -392,7 +392,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "f2_score",
     "f0.5_score",
     "jaccard_score",
-    "tpr_fpr_tnr_fnr_scores",
+    "tpr_fpr_tnr_fnr_score",
     "specificity_score",
     "npv_score",
     "weighted_f0.5_score",
@@ -401,7 +401,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_precision_score",
     "weighted_recall_score",
     "weighted_jaccard_score",
-    "weighted_tpr_fpr_tnr_fnr_scores",
+    "weighted_tpr_fpr_tnr_fnr_score",
     "weighted_specificity_score",
     "weighted_npv_score",
     "micro_f0.5_score",
@@ -410,7 +410,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_precision_score",
     "micro_recall_score",
     "micro_jaccard_score",
-    "micro_tpr_fpr_tnr_fnr_scores",
+    "micro_tpr_fpr_tnr_fnr_score",
     "micro_specificity_score",
     "micro_npv_score",
     "macro_f0.5_score",
@@ -419,7 +419,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "macro_precision_score",
     "macro_recall_score",
     "macro_jaccard_score",
-    "macro_tpr_fpr_tnr_fnr_scores",
+    "macro_tpr_fpr_tnr_fnr_score",
     "macro_specificity_score",
     "macro_npv_score",
     "unnormalized_multilabel_confusion_matrix",
@@ -467,7 +467,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_precision_score",
     "weighted_recall_score",
     "weighted_jaccard_score",
-    "weighted_tpr_fpr_tnr_fnr_scores",
+    "weighted_tpr_fpr_tnr_fnr_score",
     "weighted_specificity_score",
     "weighted_npv_score",
     "macro_f0.5_score",
@@ -476,7 +476,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "macro_precision_score",
     "macro_recall_score",
     "macro_jaccard_score",
-    "macro_tpr_fpr_tnr_fnr_scores",
+    "macro_tpr_fpr_tnr_fnr_score",
     "macro_specificity_score",
     "macro_npv_score",
     "micro_f0.5_score",
@@ -485,7 +485,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_precision_score",
     "micro_recall_score",
     "micro_jaccard_score",
-    "micro_tpr_fpr_tnr_fnr_scores",
+    "micro_tpr_fpr_tnr_fnr_score",
     "micro_specificity_score",
     "micro_npv_score",
     "unnormalized_multilabel_confusion_matrix",
@@ -495,7 +495,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "samples_precision_score",
     "samples_recall_score",
     "samples_jaccard_score",
-    "samples_tpr_fpr_tnr_fnr_scores",
+    "samples_tpr_fpr_tnr_fnr_score",
     "samples_specificity_score",
     "samples_npv_score",
 }
@@ -532,7 +532,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_f2_score",
     "micro_precision_score",
     "micro_recall_score",
-    "micro_tpr_fpr_tnr_fnr_scores",
+    "micro_tpr_fpr_tnr_fnr_score",
     "micro_specificity_score",
     "micro_npv_score",
     "matthews_corrcoef_score",
@@ -562,7 +562,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "recall_score",
     "f2_score",
     "f0.5_score",
-    "tpr_fpr_tnr_fnr_scores",
+    "tpr_fpr_tnr_fnr_score",
     "specificity_score",
     "npv_score",
     "weighted_f0.5_score",
@@ -570,7 +570,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "weighted_f2_score",
     "weighted_precision_score",
     "weighted_jaccard_score",
-    "weighted_tpr_fpr_tnr_fnr_scores",
+    "weighted_tpr_fpr_tnr_fnr_score",
     "weighted_specificity_score",
     "weighted_npv_score",
     "unnormalized_multilabel_confusion_matrix",
@@ -578,7 +578,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "macro_f2_score",
     "macro_precision_score",
     "macro_recall_score",
-    "macro_tpr_fpr_tnr_fnr_scores",
+    "macro_tpr_fpr_tnr_fnr_score",
     "macro_specificity_score",
     "macro_npv_score",
     "log_loss",
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 0bb64572202f1..b4bf353869798 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -31,6 +31,7 @@
     recall_score,
     roc_auc_score,
     top_k_accuracy_score,
+    matthews_corrcoef,
     specificity_score,
     npv_score,
 )

From ba5f5ea5981c972928b50b28d3b17646c4efd957 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 20 Jan 2022 16:37:45 +0100
Subject: [PATCH 084/127] Fix linting

Fix linting
---
 sklearn/metrics/tests/test_common.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 0b94f77248f54..c778c5b4401b1 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -187,9 +187,7 @@
     "samples_precision_score": partial(precision_score, average="samples"),
     "samples_recall_score": partial(recall_score, average="samples"),
     "samples_jaccard_score": partial(jaccard_score, average="samples"),
-    "samples_tpr_fpr_tnr_fnr_score": partial(
-        tpr_fpr_tnr_fnr_score, average="samples"
-    ),
+    "samples_tpr_fpr_tnr_fnr_score": partial(tpr_fpr_tnr_fnr_score, average="samples"),
     "samples_specificity_score": partial(specificity_score, average="samples"),
     "samples_npv_score": partial(npv_score, average="samples"),
     "cohen_kappa_score": cohen_kappa_score,

From 3199d002e6ebb6025d2f6b5c87fd91f327859865 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 20 Jan 2022 16:58:20 +0100
Subject: [PATCH 085/127] Update v1.1.rst

Fix linting
---
 doc/whats_new/v1.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 7e0d8e07db40f..38865e75b975b 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -377,7 +377,7 @@ Changelog
 - |Fix| :func:`metrics.silhouette_score` now supports integer input for precomputed
   distances. :pr:`22108` by `Thomas Fan`_.
  
- - |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_score`,
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_score`,
   :func:`metrics.specificity_score` and :func:`metrics.npv_score`.
   :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
   and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.

From 244920aa74369777717fcfe4f3933f0288141b36 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 20 Jan 2022 17:08:38 +0100
Subject: [PATCH 086/127] Update _classification.py

Fix docs
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 399520af599bc..476e99bbc92a4 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2805,7 +2805,6 @@ def tpr_fpr_tnr_fnr_score(
     specificity_score : Specificity, selectivity or true negative rate (TNR).
     multilabel_confusion_matrix : Confusion matrices for each class or sample.
     balanced_accuracy_score : Accuracy metric for imbalanced datasets.
-    tpr_fpr_tnr_fnr_score : Four basic (mis-)classification rates.
     npv_score : Negative predictive value (NPV).
 
     References
@@ -2821,6 +2820,7 @@ def tpr_fpr_tnr_fnr_score(
     Examples
     --------
     >>> import numpy as np
+    >>> from sklearn.metrics import tpr_fpr_tnr_fnr_score
     >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
     >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
     >>> tpr_fpr_tnr_fnr_score(y_true, y_pred, average='macro')

From 4e9a3eed25c89a72e15fdafff12bbfe79b840272 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 20 Jan 2022 17:38:29 +0100
Subject: [PATCH 087/127] Update _classification.py

Fix docs
---
 sklearn/metrics/_classification.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 476e99bbc92a4..4f2d702c1f43e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2807,6 +2807,14 @@ def tpr_fpr_tnr_fnr_score(
     balanced_accuracy_score : Accuracy metric for imbalanced datasets.
     npv_score : Negative predictive value (NPV).
 
+    Notes
+    -----
+    When `true positive + false negative == 0`, TPR, FNR are undefined;
+    When `true negative + false positive == 0`, FPR, TNR are undefined.
+    In such cases, by default the metric will be set to 0, as will F-score,
+    and `UndefinedMetricWarning` will be raised. This behavior can be
+    modified with `zero_division`.
+
     References
     ----------
     .. [1] `Wikipedia entry for confusion matrix
@@ -2837,14 +2845,6 @@ def tpr_fpr_tnr_fnr_score(
     ... labels=['pig', 'dog', 'cat'])
     (array([0., 0., 1.]), array([0.25, 0.5 , 0.25]),
     array([0.75, 0.5 , 0.75]), array([1., 1., 0.]))
-
-    Notes
-    -----
-    When `true positive + false negative == 0`, TPR, FNR are undefined;
-    When `true negative + false positive == 0`, FPR, TNR are undefined.
-    In such cases, by default the metric will be set to 0, as will F-score,
-    and `UndefinedMetricWarning` will be raised. This behavior can be
-    modified with `zero_division`.
     """
     _check_zero_division(zero_division)
 

From 93c52932db0132d7998233a02133eac81ee7799f Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Fri, 21 Jan 2022 09:02:18 +0100
Subject: [PATCH 088/127] Update _classification.py

Fix docs.
---
 sklearn/metrics/_classification.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 4f2d702c1f43e..c4b971f7eece9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2789,12 +2789,20 @@ def tpr_fpr_tnr_fnr_score(
     Returns
     -------
     tpr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+        The true positive rate (TPR) is the ratio `TP / (TP + FN)` where `TP`
+        is the number of true positives and `FN` the number of false negatives.
 
     fpr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+        The false positive rate (FPR) is the ratio `FP / (TN + FP)` where `TN`
+        is the number of true negatives and `FP` the number of false positives.
 
     tnr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+        The true negative rate (TNR) is the ratio `TN / (TN + FP)` where `TN`
+        is the number of true negatives and `FP` the number of false positives.
 
     fnr : float or ndarray of shape (n_unique_labels,), dtype=np.float64
+        The false negative rate (FNR) is the ratio `FN / (TP + FN)` where `TP`
+        is the number of true positives and `FN` the number of false negatives.
 
     See Also
     --------

From 1b5611a54dc637fc35e999e5f9de52737f1252cb Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 22 Jan 2022 13:18:17 +0100
Subject: [PATCH 089/127] Update the tpr_fpr_tnr_fnr_score doc and specificity
 test

---
 sklearn/metrics/_classification.py           |  2 +-
 sklearn/metrics/tests/test_classification.py | 39 ++++++++++++++------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index c4b971f7eece9..8d7386cfe39e4 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2819,7 +2819,7 @@ def tpr_fpr_tnr_fnr_score(
     -----
     When `true positive + false negative == 0`, TPR, FNR are undefined;
     When `true negative + false positive == 0`, FPR, TNR are undefined.
-    In such cases, by default the metric will be set to 0, as will F-score,
+    In such cases, by default the metric will be set to 0,
     and `UndefinedMetricWarning` will be raised. This behavior can be
     modified with `zero_division`.
 
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index c766082200f9c..d73fec6fce55c 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2199,19 +2199,34 @@ def test_specificity_warnings(zero_division):
         average="micro",
         zero_division=zero_division,
     )
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter("always")
+        specificity_score(
+            np.array([[1, 1], [1, 1]]),
+            np.array([[0, 0], [0, 0]]),
+            average="micro",
+            zero_division=zero_division,
+        )
+        if zero_division == "warn":
+            assert (
+                str(record.pop().message)
+                == "TNR is ill-defined and "
+                "being set to 0.0 due to no negatives."
+                "Use `zero_division` parameter to control"
+                " this behavior."
+            )
+        else:
+            assert len(record) == 0
 
-    specificity_score(
-        np.array([[1, 1], [1, 1]]),
-        np.array([[0, 0], [0, 0]]),
-        average="micro",
-        zero_division=zero_division,
-    )
-    if zero_division == "warn":
-        pytest.warns(Warning if zero_division == "warn" else None)
-
-    specificity_score([1, 1], [1, 1])
-    if zero_division == "warn":
-        pytest.warns(Warning if zero_division == "warn" else None)
+        specificity_score([1, 1], [1, 1])
+        if zero_division == "warn":
+            assert (
+                str(record.pop().message)
+                == "TNR is ill-defined and "
+                "being set to 0.0 due to no negatives."
+                "Use `zero_division` parameter to control"
+                " this behavior."
+            )
 
 
 def test_npv_score_binary_averaged():

From 21065f40f2602cb7e20cebc273fab84f17c65a1c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 22 Jan 2022 14:10:27 +0100
Subject: [PATCH 090/127] Update test_classification.py

Fix test_specificity_warnings
---
 sklearn/metrics/tests/test_classification.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index d73fec6fce55c..64ca923562195 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2210,8 +2210,8 @@ def test_specificity_warnings(zero_division):
         if zero_division == "warn":
             assert (
                 str(record.pop().message)
-                == "TNR is ill-defined and "
-                "being set to 0.0 due to no negatives."
+                == "Tnr is ill-defined and "
+                "being set to 0.0 due to no negatives samples."
                 "Use `zero_division` parameter to control"
                 " this behavior."
             )
@@ -2222,11 +2222,13 @@ def test_specificity_warnings(zero_division):
         if zero_division == "warn":
             assert (
                 str(record.pop().message)
-                == "TNR is ill-defined and "
-                "being set to 0.0 due to no negatives."
+                == "Tnr is ill-defined and "
+                "being set to 0.0 due to no negatives samples."
                 "Use `zero_division` parameter to control"
                 " this behavior."
             )
+        else:
+            assert len(record) == 0
 
 
 def test_npv_score_binary_averaged():

From 2c621a6b0d408a1c5651121af277d4066dcb9220 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 22 Jan 2022 21:47:51 +0100
Subject: [PATCH 091/127] Update test_classification.py

Fix test_specificity_warnings
---
 sklearn/metrics/tests/test_classification.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 64ca923562195..a76886ca4f7cd 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2212,7 +2212,7 @@ def test_specificity_warnings(zero_division):
                 str(record.pop().message)
                 == "Tnr is ill-defined and "
                 "being set to 0.0 due to no negatives samples."
-                "Use `zero_division` parameter to control"
+                " Use `zero_division` parameter to control"
                 " this behavior."
             )
         else:
@@ -2224,11 +2224,9 @@ def test_specificity_warnings(zero_division):
                 str(record.pop().message)
                 == "Tnr is ill-defined and "
                 "being set to 0.0 due to no negatives samples."
-                "Use `zero_division` parameter to control"
+                " Use `zero_division` parameter to control"
                 " this behavior."
             )
-        else:
-            assert len(record) == 0
 
 
 def test_npv_score_binary_averaged():

From 4ff05d1bda17025e1a36fb6f45c9f75b4203d7b5 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 24 Jan 2022 19:45:37 +0100
Subject: [PATCH 092/127] Update test_classification.py

Add tests for subsets of and too many of labels.
---
 sklearn/metrics/tests/test_classification.py | 107 ++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index a76886ca4f7cd..006a291ccb118 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -382,6 +382,109 @@ def test_tpr_fpr_tnr_fnr_score_binary_averaged():
     assert fnr == np.average(fnrs, weights=support)
 
 
+@ignore_warnings
+def test_tpr_fpr_tnr_fnr_score_binary_single_class():
+    # Test how the scores behave with a single positive or
+    # negative class
+    # Such a case may occur with non-stratified cross-validation
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_score([1, 1], [1, 1])
+    assert 1.0 == tprs[0]
+    assert 0.0 == fprs[0]
+    assert 0.0 == tnrs[0]
+    assert 0.0 == fnrs[0]
+
+    tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_score([-1, -1], [-1, -1])
+    assert 1.0 == tprs[0]
+    assert 0.0 == fprs[0]
+    assert 0.0 == tnrs[0]
+    assert 0.0 == fnrs[0]
+
+
+@ignore_warnings
+def test_tpr_fpr_tnr_fnr_score_extra_labels():
+    # Test handling of explicit additional (not in input) labels
+    y_true = [1, 3, 3, 2]
+    y_pred = [1, 1, 3, 2]
+    y_true_bin = label_binarize(y_true, classes=np.arange(5))
+    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
+    data = [(y_true, y_pred), (y_true_bin, y_pred_bin)]
+
+    for i, (y_true, y_pred) in enumerate(data):
+        # No averaging
+        tprs, fprs, tnrs, fnrs = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None
+        )
+        assert_array_almost_equal(tprs, [0.0, 1.0, 1.0, 0.5, 0.0], 2)
+        assert_array_almost_equal(fprs, [0.0, 0.33, 0.0, 0.0, 0.0], 2)
+        assert_array_almost_equal(tnrs, [1.0, 0.67, 1.0, 1.0, 1.0], 2)
+        assert_array_almost_equal(fnrs, [0.0, 0.0, 0.0, 0.5, 0.0], 2)
+
+        # Macro average
+        scores = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, labels=[0, 1, 2, 3, 4], average="macro"
+        )
+        assert_array_almost_equal(scores, [0.5, 0.07, 0.93, 0.1], 2)
+
+        # Micro average
+        scores = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, labels=[0, 1, 2, 3, 4], average="micro"
+        )
+        assert_array_almost_equal(scores, [0.75, 0.0625, 0.9375, 0.25], 4)
+
+        # Further tests
+        for average in ["macro", "micro", "weighted", "samples"]:
+            if average in ["micro", "samples"] and i == 0:
+                continue
+            assert_almost_equal(
+                tpr_fpr_tnr_fnr_score(
+                    y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average
+                ),
+                tpr_fpr_tnr_fnr_score(y_true, y_pred, labels=None, average=average),
+            )
+
+    # Error when introducing invalid label in multilabel case
+    for average in [None, "macro", "micro", "samples"]:
+        with pytest.raises(ValueError):
+            tpr_fpr_tnr_fnr_score(
+                y_true_bin, y_pred_bin, labels=np.arange(6), average=average
+            )
+        with pytest.raises(ValueError):
+            tpr_fpr_tnr_fnr_score(
+                y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
+            )
+
+
+@ignore_warnings
+def test_tpr_fpr_tnr_fnr_score_ignored_labels():
+    # Test handling of a subset of labels
+    y_true = [1, 1, 2, 3]
+    y_pred = [1, 3, 3, 3]
+    y_true_bin = label_binarize(y_true, classes=np.arange(5))
+    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
+    data = [(y_true, y_pred), (y_true_bin, y_pred_bin)]
+
+    for i, (y_true, y_pred) in enumerate(data):
+        scores_13 = partial(tpr_fpr_tnr_fnr_score, y_true, y_pred, labels=[1, 3])
+        scores_all = partial(tpr_fpr_tnr_fnr_score, y_true, y_pred, labels=None)
+
+        assert_array_almost_equal(
+            ([0.5, 1.0], [0.0, 0.67], [1.0, 0.33], [0.5, 0.0]),
+            scores_13(average=None),
+            2
+        )
+        assert_almost_equal(
+            [0.75, 0.33, 0.67, 0.25], scores_13(average="macro"), 2
+        )
+        assert_almost_equal([0.67, 0.4, 0.6, 0.33], scores_13(average="micro"), 2)
+        assert_almost_equal(
+            [0.67, 0.22, 0.78, 0.33], scores_13(average="weighted"), 2
+        )
+
+        # ensure the above were meaningful tests:
+        for average in ["macro", "weighted", "micro"]:
+            assert scores_13(average=average) != scores_all(average=average)
+
+
 def test_tpr_fpr_tnr_fnr_score_multiclass():
     # Test TPR, FPR, TNR, FNR scores for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
@@ -415,7 +518,7 @@ def test_tpr_fpr_tnr_fnr_score_multiclass():
     with pytest.raises(ValueError):
         tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")
 
-    # same prediction but with and explicit label ordering
+    # same prediction but with explicit label ordering
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
         y_true, y_pred, labels=[0, 2, 1], average=None
     )
@@ -2264,7 +2367,7 @@ def test_npv_score_multiclass():
     with pytest.raises(ValueError):
         npv_score(y_true, y_pred, average="samples")
 
-    # same prediction but with and explicit label ordering
+    # same prediction but with explicit label ordering
     assert_array_almost_equal(
         npv_score(y_true, y_pred, labels=[0, 2, 1], average=None), [0.9, 0.94, 0.58], 2
     )

From 264731eba7db12f3e1686f69af4bb50094ea5d54 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 24 Jan 2022 19:50:52 +0100
Subject: [PATCH 093/127] Update test_classification.py

fix linting
---
 sklearn/metrics/tests/test_classification.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 006a291ccb118..417bf7e043306 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -470,15 +470,11 @@ def test_tpr_fpr_tnr_fnr_score_ignored_labels():
         assert_array_almost_equal(
             ([0.5, 1.0], [0.0, 0.67], [1.0, 0.33], [0.5, 0.0]),
             scores_13(average=None),
-            2
-        )
-        assert_almost_equal(
-            [0.75, 0.33, 0.67, 0.25], scores_13(average="macro"), 2
+            2,
         )
+        assert_almost_equal([0.75, 0.33, 0.67, 0.25], scores_13(average="macro"), 2)
         assert_almost_equal([0.67, 0.4, 0.6, 0.33], scores_13(average="micro"), 2)
-        assert_almost_equal(
-            [0.67, 0.22, 0.78, 0.33], scores_13(average="weighted"), 2
-        )
+        assert_almost_equal([0.67, 0.22, 0.78, 0.33], scores_13(average="weighted"), 2)
 
         # ensure the above were meaningful tests:
         for average in ["macro", "weighted", "micro"]:

From 1806ad0572bfe558d8d5ff531a3f49417ce0d677 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Mon, 24 Jan 2022 20:17:01 +0100
Subject: [PATCH 094/127] Update test_classification.py

Fix test_tpr_fpr_tnr_fnr_score_extra_labels()
---
 sklearn/metrics/tests/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 417bf7e043306..e0f06cfd68e9e 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -433,7 +433,7 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
 
         # Further tests
         for average in ["macro", "micro", "weighted", "samples"]:
-            if average in ["micro", "samples"] and i == 0:
+            if average in ["macro", "micro", "samples"] and i == 0:
                 continue
             assert_almost_equal(
                 tpr_fpr_tnr_fnr_score(

From 955a28667513e5ac8485080f95ea0da947766603 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 08:59:18 +0100
Subject: [PATCH 095/127] Update test_classification.py

Moar tests.
---
 sklearn/metrics/tests/test_classification.py | 147 ++++++++++++++++---
 1 file changed, 123 insertions(+), 24 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index e0f06cfd68e9e..2686010bef2f5 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -402,7 +402,8 @@ def test_tpr_fpr_tnr_fnr_score_binary_single_class():
 
 @ignore_warnings
 def test_tpr_fpr_tnr_fnr_score_extra_labels():
-    # Test handling of explicit additional (not in input) labels
+    # Test TPR, FPR, TNR, FNR handling of explicit additional (not in input)
+    # labels
     y_true = [1, 3, 3, 2]
     y_pred = [1, 1, 3, 2]
     y_true_bin = label_binarize(y_true, classes=np.arange(5))
@@ -456,7 +457,7 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
 
 @ignore_warnings
 def test_tpr_fpr_tnr_fnr_score_ignored_labels():
-    # Test handling of a subset of labels
+    # Test TPR, FPR, TNR, FNR handling of a subset of labels
     y_true = [1, 1, 2, 3]
     y_pred = [1, 3, 3, 3]
     y_true_bin = label_binarize(y_true, classes=np.arange(5))
@@ -2318,17 +2319,16 @@ def test_specificity_warnings(zero_division):
             assert len(record) == 0
 
         specificity_score([1, 1], [1, 1])
-        if zero_division == "warn":
-            assert (
-                str(record.pop().message)
-                == "Tnr is ill-defined and "
-                "being set to 0.0 due to no negatives samples."
-                " Use `zero_division` parameter to control"
-                " this behavior."
-            )
+        assert (
+            str(record.pop().message)
+            == "Tnr is ill-defined and "
+            "being set to 0.0 due to no negatives samples."
+            " Use `zero_division` parameter to control"
+            " this behavior."
+        )
 
 
-def test_npv_score_binary_averaged():
+def test_npv_binary_averaged():
     # Test NPV score for binary classification task
     y_true, y_pred, _ = make_prediction(binary=True)
 
@@ -2344,7 +2344,92 @@ def test_npv_score_binary_averaged():
     assert npw_weighted == np.average(npv_none, weights=support)
 
 
-def test_npv_score_multiclass():
+@ignore_warnings
+def test_npv_binary_single_class():
+    # Test how the NPV score behaves with a single positive or
+    # negative class
+    # Such a case may occur with non-stratified cross-validation
+    assert 0.0 == npv_score([1, 1], [1, 1])
+    assert 1.0 == npv_score([-1, -1], [-1, -1])
+
+
+@ignore_warnings
+def test_npv_extra_labels():
+    # Test NPV handling of explicit additional (not in input) labels
+    y_true = [1, 3, 3, 2]
+    y_pred = [1, 1, 3, 2]
+    y_true_bin = label_binarize(y_true, classes=np.arange(5))
+    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
+    data = [(y_true, y_pred), (y_true_bin, y_pred_bin)]
+
+    for i, (y_true, y_pred) in enumerate(data):
+        print(i)
+        # No averaging
+        npvs = npv_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None)
+        assert_array_almost_equal(npvs, [1.0, 1.0, 1.0, 0.67, 1.0], 2)
+
+        # Macro average
+        npv = npv_score(
+            y_true, y_pred, labels=[0, 1, 2, 3, 4], average="macro"
+        )
+        assert_almost_equal(npv, 0.93, 2)
+
+        # Micro average
+        npv = npv_score(
+            y_true, y_pred, labels=[0, 1, 2, 3, 4], average="micro"
+        )
+        assert_almost_equal(npv, 0.9375, 4)
+
+        # Further tests
+        for average in ["macro", "micro", "weighted", "samples"]:
+            print(average)
+            if average in ["macro", "micro", "samples"] and i == 0:
+                continue
+            assert_almost_equal(
+                npv_score(
+                    y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average
+                ),
+                npv_score(y_true, y_pred, labels=None, average=average),
+            )
+
+    # Error when introducing invalid label in multilabel case
+    for average in [None, "macro", "micro", "samples"]:
+        with pytest.raises(ValueError):
+            npv_score(
+                y_true_bin, y_pred_bin, labels=np.arange(6), average=average
+            )
+        with pytest.raises(ValueError):
+            npv_score(
+                y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
+            )
+
+
+@ignore_warnings
+def test_npv_ignored_labels():
+    # Test NPV handling of a subset of labels
+    y_true = [1, 1, 2, 3]
+    y_pred = [1, 3, 3, 3]
+    y_true_bin = label_binarize(y_true, classes=np.arange(5))
+    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
+    data = [(y_true, y_pred), (y_true_bin, y_pred_bin)]
+
+    for i, (y_true, y_pred) in enumerate(data):
+        npv_13 = partial(npv_score, y_true, y_pred, labels=[1, 3])
+        npv_all = partial(npv_score, y_true, y_pred, labels=None)
+
+        assert_almost_equal([0.67, 1.0], npv_13(average=None), 2)
+        assert_almost_equal(0.83, npv_13(average="macro"), 2)
+        assert_almost_equal(0.75, npv_13(average="micro"), 2)
+        assert_almost_equal(0.78, npv_13(average="weighted"), 2)
+
+        # ensure the above were meaningful tests:
+        for average in ["macro", "weighted", "micro"]:
+            if average == "micro" and i == 0:
+                continue
+            assert npv_13(average=average) != npv_all(average=average)
+
+
+def test_npv_multiclass():
     # Test NPV score for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
 
@@ -2378,19 +2463,33 @@ def test_npv_warnings(zero_division):
         average="micro",
         zero_division=zero_division,
     )
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter("always")
+        npv_score(
+            np.array([[0, 0], [0, 0]]),
+            np.array([[1, 1], [1, 1]]),
+            average="micro",
+            zero_division=zero_division,
+        )
+        if zero_division == "warn":
+            assert (
+                str(record.pop().message)
+                == "Npv is ill-defined and "
+                "being set to 0.0 due to no negative call samples."
+                " Use `zero_division` parameter to control"
+                " this behavior."
+            )
+        else:
+            assert len(record) == 0
 
-    npv_score(
-        np.array([[0, 0], [0, 0]]),
-        np.array([[1, 1], [1, 1]]),
-        average="micro",
-        zero_division=zero_division,
-    )
-    if zero_division == "warn":
-        pytest.warns(Warning if zero_division == "warn" else None)
-
-    npv_score([1, 1], [1, 1])
-    if zero_division == "warn":
-        pytest.warns(Warning if zero_division == "warn" else None)
+        npv_score([1, 1], [1, 1])
+        assert (
+            str(record.pop().message)
+            == "Npv is ill-defined and "
+            "being set to 0.0 due to no negative call samples."
+            " Use `zero_division` parameter to control"
+            " this behavior."
+        )
 
 
 def test_prf_average_binary_data_non_binary():

From d3798f5d296da5f575e86762dbca881af8577af1 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 09:08:09 +0100
Subject: [PATCH 096/127] Update test_classification.py

Fix linting.
---
 sklearn/metrics/tests/test_classification.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 2686010bef2f5..0401764420e08 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2369,15 +2369,11 @@ def test_npv_extra_labels():
         assert_array_almost_equal(npvs, [1.0, 1.0, 1.0, 0.67, 1.0], 2)
 
         # Macro average
-        npv = npv_score(
-            y_true, y_pred, labels=[0, 1, 2, 3, 4], average="macro"
-        )
+        npv = npv_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average="macro")
         assert_almost_equal(npv, 0.93, 2)
 
         # Micro average
-        npv = npv_score(
-            y_true, y_pred, labels=[0, 1, 2, 3, 4], average="micro"
-        )
+        npv = npv_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average="micro")
         assert_almost_equal(npv, 0.9375, 4)
 
         # Further tests

From 90f2bb05c1398681837eb1e235e9dcadd0a483d8 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 09:56:46 +0100
Subject: [PATCH 097/127] Update test_classification.py

Fix linting.
---
 sklearn/metrics/tests/test_classification.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 0401764420e08..f1042d2259efd 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2378,26 +2378,19 @@ def test_npv_extra_labels():
 
         # Further tests
         for average in ["macro", "micro", "weighted", "samples"]:
-            print(average)
             if average in ["macro", "micro", "samples"] and i == 0:
                 continue
             assert_almost_equal(
-                npv_score(
-                    y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average
-                ),
+                npv_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average),
                 npv_score(y_true, y_pred, labels=None, average=average),
             )
 
     # Error when introducing invalid label in multilabel case
     for average in [None, "macro", "micro", "samples"]:
         with pytest.raises(ValueError):
-            npv_score(
-                y_true_bin, y_pred_bin, labels=np.arange(6), average=average
-            )
+            npv_score(y_true_bin, y_pred_bin, labels=np.arange(6), average=average)
         with pytest.raises(ValueError):
-            npv_score(
-                y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
-            )
+            npv_score(y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average)
 
 
 @ignore_warnings

From cc3dc7a30484ac570268dc04430c8dc8f155ac1f Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 12:47:47 +0100
Subject: [PATCH 098/127] Update test_classification.py

Expand test_tpr_fpr_tnr_fnr_score_extra_labels()
---
 sklearn/metrics/tests/test_classification.py | 35 ++++++++++++--------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index f1042d2259efd..7f7a82d1800cb 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -444,15 +444,22 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
+    msg = "All labels must be in [0, n labels) for multilabel targets."
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError) as record:
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(6), average=average
             )
-        with pytest.raises(ValueError):
+            assert len(record) > 0
+            for item in record:
+                assert msg in str(item.message)
+        with pytest.raises(ValueError) as record:
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
             )
+            assert len(record) > 0
+            for item in record:
+                assert msg in str(item.message)
 
 
 @ignore_warnings
@@ -495,22 +502,22 @@ def test_tpr_fpr_tnr_fnr_score_multiclass():
 
     # averaging tests
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="micro")
-    assert_array_almost_equal(tpr, 0.53, 2)
-    assert_array_almost_equal(fpr, 0.23, 2)
-    assert_array_almost_equal(tnr, 0.77, 2)
-    assert_array_almost_equal(fnr, 0.47, 2)
+    assert_almost_equal(tpr, 0.53, 2)
+    assert_almost_equal(fpr, 0.23, 2)
+    assert_almost_equal(tnr, 0.77, 2)
+    assert_almost_equal(fnr, 0.47, 2)
 
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="macro")
-    assert_array_almost_equal(tpr, 0.6, 2)
-    assert_array_almost_equal(fpr, 0.22, 2)
-    assert_array_almost_equal(tnr, 0.78, 2)
-    assert_array_almost_equal(fnr, 0.4, 2)
+    assert_almost_equal(tpr, 0.6, 2)
+    assert_almost_equal(fpr, 0.22, 2)
+    assert_almost_equal(tnr, 0.78, 2)
+    assert_almost_equal(fnr, 0.4, 2)
 
     tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(y_true, y_pred, average="weighted")
-    assert_array_almost_equal(tpr, 0.53, 2)
-    assert_array_almost_equal(fpr, 0.2, 2)
-    assert_array_almost_equal(tnr, 0.8, 2)
-    assert_array_almost_equal(fnr, 0.47, 2)
+    assert_almost_equal(tpr, 0.53, 2)
+    assert_almost_equal(fpr, 0.2, 2)
+    assert_almost_equal(tnr, 0.8, 2)
+    assert_almost_equal(fnr, 0.47, 2)
 
     with pytest.raises(ValueError):
         tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")

From 3ac784c8fd1bc97b474b930f8f061588aa224a5f Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 20:32:04 +0100
Subject: [PATCH 099/127] Update test_classification.py

Improvements of tests.
---
 sklearn/metrics/tests/test_classification.py | 23 +++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 7f7a82d1800cb..1f5d55c68d4a1 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -444,22 +444,16 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
-    msg = "All labels must be in [0, n labels) for multilabel targets."
+    err_msg = "All labels must be in [0, n labels) for multilabel targets."
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError) as record:
+        with pytest.raises(ValueError, match=err_msg):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(6), average=average
             )
-            assert len(record) > 0
-            for item in record:
-                assert msg in str(item.message)
-        with pytest.raises(ValueError) as record:
+        with pytest.raises(ValueError, match=err_msg):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
             )
-            assert len(record) > 0
-            for item in record:
-                assert msg in str(item.message)
 
 
 @ignore_warnings
@@ -519,7 +513,8 @@ def test_tpr_fpr_tnr_fnr_score_multiclass():
     assert_almost_equal(tnr, 0.8, 2)
     assert_almost_equal(fnr, 0.47, 2)
 
-    with pytest.raises(ValueError):
+    err_msg = "All labels must be in [0, n labels) for multilabel targets."
+    with pytest.raises(ValueError, match=err_msg):
         tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")
 
     # same prediction but with explicit label ordering
@@ -2393,10 +2388,11 @@ def test_npv_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
+    err_msg = "All labels must be in [0, n labels) for multilabel targets."
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match=err_msg):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(6), average=average)
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match=err_msg):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average)
 
 
@@ -2441,7 +2437,8 @@ def test_npv_multiclass():
 
     assert_array_almost_equal(npv_score(y_true, y_pred, average="weighted"), 0.78, 2)
 
-    with pytest.raises(ValueError):
+    err_msg = "All labels must be in [0, n labels) for multilabel targets."
+    with pytest.raises(ValueError, match=err_msg):
         npv_score(y_true, y_pred, average="samples")
 
     # same prediction but with explicit label ordering

From 0958afbc5a7af563c5501ae7ba7f177c34da523b Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 21:11:16 +0100
Subject: [PATCH 100/127] Update test_classification.py

Fix tests.
---
 sklearn/metrics/tests/test_classification.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 1f5d55c68d4a1..96d3a955622a0 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -446,11 +446,11 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
     # Error when introducing invalid label in multilabel case
     err_msg = "All labels must be in [0, n labels) for multilabel targets."
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError, match=err_msg):
+        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(6), average=average
             )
-        with pytest.raises(ValueError, match=err_msg):
+        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
             )
@@ -513,8 +513,8 @@ def test_tpr_fpr_tnr_fnr_score_multiclass():
     assert_almost_equal(tnr, 0.8, 2)
     assert_almost_equal(fnr, 0.47, 2)
 
-    err_msg = "All labels must be in [0, n labels) for multilabel targets."
-    with pytest.raises(ValueError, match=err_msg):
+    err_msg = "Samplewise metrics are not available outside of multilabel"
+    with pytest.raises(ValueError, match=fr"{err_msg}.*"):
         tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")
 
     # same prediction but with explicit label ordering
@@ -2390,9 +2390,9 @@ def test_npv_extra_labels():
     # Error when introducing invalid label in multilabel case
     err_msg = "All labels must be in [0, n labels) for multilabel targets."
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError, match=err_msg):
+        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(6), average=average)
-        with pytest.raises(ValueError, match=err_msg):
+        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average)
 
 
@@ -2438,7 +2438,7 @@ def test_npv_multiclass():
     assert_array_almost_equal(npv_score(y_true, y_pred, average="weighted"), 0.78, 2)
 
     err_msg = "All labels must be in [0, n labels) for multilabel targets."
-    with pytest.raises(ValueError, match=err_msg):
+    with pytest.raises(ValueError, match=fr"{err_msg}.*"):
         npv_score(y_true, y_pred, average="samples")
 
     # same prediction but with explicit label ordering

From 79473c8622446c9349a73994694edb1b1323f0f1 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 25 Jan 2022 21:36:20 +0100
Subject: [PATCH 101/127] Update test_classification.py

Fix tests.
---
 sklearn/metrics/tests/test_classification.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 96d3a955622a0..ad0e411adab5c 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -444,13 +444,13 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
-    err_msg = "All labels must be in [0, n labels) for multilabel targets."
+    err_msg = r"All labels must be in \[0, n labels\) for multilabel targets.*"
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
+        with pytest.raises(ValueError, match=err_msg):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(6), average=average
             )
-        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
+        with pytest.raises(ValueError, match=err_msg):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
             )
@@ -513,8 +513,8 @@ def test_tpr_fpr_tnr_fnr_score_multiclass():
     assert_almost_equal(tnr, 0.8, 2)
     assert_almost_equal(fnr, 0.47, 2)
 
-    err_msg = "Samplewise metrics are not available outside of multilabel"
-    with pytest.raises(ValueError, match=fr"{err_msg}.*"):
+    err_msg = r"Samplewise metrics are not available outside of multilabel.*"
+    with pytest.raises(ValueError, match=err_msg):
         tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")
 
     # same prediction but with explicit label ordering
@@ -2388,11 +2388,11 @@ def test_npv_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
-    err_msg = "All labels must be in [0, n labels) for multilabel targets."
+    err_msg = r"All labels must be in \[0, n labels\) for multilabel targets.*"
     for average in [None, "macro", "micro", "samples"]:
-        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
+        with pytest.raises(ValueError, match=err_msg):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(6), average=average)
-        with pytest.raises(ValueError, match=fr"{err_msg}.*"):
+        with pytest.raises(ValueError, match=err_msg):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average)
 
 
@@ -2437,8 +2437,8 @@ def test_npv_multiclass():
 
     assert_array_almost_equal(npv_score(y_true, y_pred, average="weighted"), 0.78, 2)
 
-    err_msg = "All labels must be in [0, n labels) for multilabel targets."
-    with pytest.raises(ValueError, match=fr"{err_msg}.*"):
+    err_msg = r"Samplewise metrics are not available outside of multilabel.*"
+    with pytest.raises(ValueError, match=err_msg):
         npv_score(y_true, y_pred, average="samples")
 
     # same prediction but with explicit label ordering

From 83e84df4706d412b9c86600eef12a9c67b0f8c88 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 07:55:48 +0100
Subject: [PATCH 102/127] Update test_classification.py

Enhance tests.
---
 sklearn/metrics/tests/test_classification.py | 119 ++++++++++++++-----
 1 file changed, 90 insertions(+), 29 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index ad0e411adab5c..2f624983fbf86 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -444,12 +444,19 @@ def test_tpr_fpr_tnr_fnr_score_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
-    err_msg = r"All labels must be in \[0, n labels\) for multilabel targets.*"
     for average in [None, "macro", "micro", "samples"]:
+        err_msg = (
+            r"All labels must be in \[0, n labels\) for multilabel targets\."
+            " Got 5 > 4"
+        )
         with pytest.raises(ValueError, match=err_msg):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(6), average=average
             )
+        err_msg = (
+            r"All labels must be in \[0, n labels\) for multilabel targets\."
+            " Got -1 < 0"
+        )
         with pytest.raises(ValueError, match=err_msg):
             tpr_fpr_tnr_fnr_score(
                 y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average
@@ -513,7 +520,10 @@ def test_tpr_fpr_tnr_fnr_score_multiclass():
     assert_almost_equal(tnr, 0.8, 2)
     assert_almost_equal(fnr, 0.47, 2)
 
-    err_msg = r"Samplewise metrics are not available outside of multilabel.*"
+    err_msg = (
+        "Samplewise metrics are not available outside of multilabel"
+        r" classification\."
+    )
     with pytest.raises(ValueError, match=err_msg):
         tpr_fpr_tnr_fnr_score(y_true, y_pred, average="samples")
 
@@ -532,48 +542,86 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
 
-    pytest.warns(Warning if zero_division == "warn" else None)
+    expected_type_warning = UndefinedMetricWarning if zero_division == "warn" else None
 
     zero_division_value = 1.0 if zero_division == 1.0 else 0.0
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
-        y_true, y_pred, average=None, zero_division=zero_division
-    )
+
+    with pytest.warns(expected_type_warning) as record:
+        tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, average=None, zero_division=zero_division
+        )
+        if expected_type_warning is None:
+            assert len(record) == 0
+        else:
+            assert (
+                str(record.pop().message)
+                == "Fnr is ill-defined and "
+                "being set to 0.0 in labels due to no positives samples."
+                " Use `zero_division` parameter to control"
+                " this behavior."
+            )
     assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division_value], 2)
     assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2)
     assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
     assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division_value], 2)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
-        y_true, y_pred, average="macro", zero_division=zero_division
-    )
+    with pytest.warns(expected_type_warning) as record:
+        tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, average="macro", zero_division=zero_division
+        )
+        if expected_type_warning is None:
+            assert len(record) == 0
+        else:
+            assert (
+                str(record.pop().message)
+                == "Fnr is ill-defined and "
+                "being set to 0.0 in labels due to no positives samples."
+                " Use `zero_division` parameter to control"
+                " this behavior."
+            )
     assert_almost_equal(tpr, 0.625 if zero_division_value else 0.375)
     assert_almost_equal(fpr, 1 / 3.0 / 4.0)
     assert_almost_equal(tnr, 0.91666, 5)
     assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
-        y_true, y_pred, average="micro", zero_division=zero_division
-    )
+    with pytest.warns(expected_type_warning) as record:
+        tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, average="micro", zero_division=zero_division
+        )
+        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.125)
     assert_almost_equal(tnr, 0.875)
     assert_almost_equal(fnr, 0.5)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
-        y_true, y_pred, average="weighted", zero_division=zero_division
-    )
+    with pytest.warns(expected_type_warning) as record:
+        tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
+            y_true, y_pred, average="weighted", zero_division=zero_division
+        )
+        if expected_type_warning is None:
+            assert len(record) == 0
+        else:
+            assert (
+                str(record.pop().message)
+                == "Fnr is ill-defined and "
+                "being set to 0.0 in labels due to no positives samples."
+                " Use `zero_division` parameter to control"
+                " this behavior."
+            )
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0)
     assert_almost_equal(tnr, 1.0)
     assert_almost_equal(fnr, 0.5)
 
-    tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
-        y_true,
-        y_pred,
-        average="samples",
-        sample_weight=[1, 1, 2],
-        zero_division=zero_division,
-    )
+    with pytest.warns(expected_type_warning) as record:
+        tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
+            y_true,
+            y_pred,
+            average="samples",
+            sample_weight=[1, 1, 2],
+            zero_division=zero_division,
+        )
+        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.08333, 5)
     assert_almost_equal(tnr, 0.91666, 5)
@@ -2388,10 +2436,17 @@ def test_npv_extra_labels():
             )
 
     # Error when introducing invalid label in multilabel case
-    err_msg = r"All labels must be in \[0, n labels\) for multilabel targets.*"
     for average in [None, "macro", "micro", "samples"]:
+        err_msg = (
+            r"All labels must be in \[0, n labels\) for multilabel targets\."
+            " Got 5 > 4"
+        )
         with pytest.raises(ValueError, match=err_msg):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(6), average=average)
+        err_msg = (
+            r"All labels must be in \[0, n labels\) for multilabel targets\."
+            " Got -1 < 0"
+        )
         with pytest.raises(ValueError, match=err_msg):
             npv_score(y_true_bin, y_pred_bin, labels=np.arange(-1, 4), average=average)
 
@@ -2437,7 +2492,10 @@ def test_npv_multiclass():
 
     assert_array_almost_equal(npv_score(y_true, y_pred, average="weighted"), 0.78, 2)
 
-    err_msg = r"Samplewise metrics are not available outside of multilabel.*"
+    err_msg = (
+        "Samplewise metrics are not available outside of multilabel"
+        r" classification\."
+    )
     with pytest.raises(ValueError, match=err_msg):
         npv_score(y_true, y_pred, average="samples")
 
@@ -2456,15 +2514,19 @@ def test_npv_warnings(zero_division):
         average="micro",
         zero_division=zero_division,
     )
-    with warnings.catch_warnings(record=True) as record:
-        warnings.simplefilter("always")
+
+    expected_type_warning = UndefinedMetricWarning if zero_division == "warn" else None
+
+    with pytest.warns(expected_type_warning) as record:
         npv_score(
             np.array([[0, 0], [0, 0]]),
             np.array([[1, 1], [1, 1]]),
             average="micro",
             zero_division=zero_division,
         )
-        if zero_division == "warn":
+        if expected_type_warning is None:
+            assert len(record) == 0
+        else:
             assert (
                 str(record.pop().message)
                 == "Npv is ill-defined and "
@@ -2472,9 +2534,8 @@ def test_npv_warnings(zero_division):
                 " Use `zero_division` parameter to control"
                 " this behavior."
             )
-        else:
-            assert len(record) == 0
 
+    with pytest.warns(UndefinedMetricWarning) as record:
         npv_score([1, 1], [1, 1])
         assert (
             str(record.pop().message)

From bdf7920e7826afcdc9a7baa2ba5dc9963276996f Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 10:59:35 +0100
Subject: [PATCH 103/127] Update test_classification.py

Fix tests.
---
 sklearn/metrics/tests/test_classification.py | 25 +++++++-------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 2f624983fbf86..60a3888380284 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -555,8 +555,8 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
         else:
             assert (
                 str(record.pop().message)
-                == "Fnr is ill-defined and "
-                "being set to 0.0 in labels due to no positives samples."
+                == "Tpr is ill-defined and "
+                "being set to 0.0 in labels with no positives samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
             )
@@ -2515,6 +2515,10 @@ def test_npv_warnings(zero_division):
         zero_division=zero_division,
     )
 
+    msg = (
+        "Npv is ill-defined and being set to 0.0 due to no negative call samples."
+        " Use `zero_division` parameter to control this behavior."
+    )
     expected_type_warning = UndefinedMetricWarning if zero_division == "warn" else None
 
     with pytest.warns(expected_type_warning) as record:
@@ -2527,23 +2531,10 @@ def test_npv_warnings(zero_division):
         if expected_type_warning is None:
             assert len(record) == 0
         else:
-            assert (
-                str(record.pop().message)
-                == "Npv is ill-defined and "
-                "being set to 0.0 due to no negative call samples."
-                " Use `zero_division` parameter to control"
-                " this behavior."
-            )
+            assert str(record[-1].message == msg
 
-    with pytest.warns(UndefinedMetricWarning) as record:
+    with pytest.warns(UndefinedMetricWarning, match=msg):
         npv_score([1, 1], [1, 1])
-        assert (
-            str(record.pop().message)
-            == "Npv is ill-defined and "
-            "being set to 0.0 due to no negative call samples."
-            " Use `zero_division` parameter to control"
-            " this behavior."
-        )
 
 
 def test_prf_average_binary_data_non_binary():

From 81573d5939123c66d9e1b2e3a72653fd45403301 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 11:29:03 +0100
Subject: [PATCH 104/127] Update test_classification.py

typo fix
---
 sklearn/metrics/tests/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 60a3888380284..1cf5465dc98f5 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2531,7 +2531,7 @@ def test_npv_warnings(zero_division):
         if expected_type_warning is None:
             assert len(record) == 0
         else:
-            assert str(record[-1].message == msg
+            assert str(record[-1].message) == msg
 
     with pytest.warns(UndefinedMetricWarning, match=msg):
         npv_score([1, 1], [1, 1])

From e75c3fa83cd381efa7c7a3ea6c2bc7f01137c181 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 12:15:21 +0100
Subject: [PATCH 105/127] Update test_classification.py

Test enhancements.
---
 sklearn/metrics/tests/test_classification.py | 34 ++++++--------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 1cf5465dc98f5..ed11912b5878d 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -542,6 +542,10 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
 
+    msg = (
+        "Tpr is ill-defined and being set to 0.0 in labels with no positives samples."
+        " Use `zero_division` parameter to control this behavior."
+    )
     expected_type_warning = UndefinedMetricWarning if zero_division == "warn" else None
 
     zero_division_value = 1.0 if zero_division == 1.0 else 0.0
@@ -553,13 +557,7 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
         if expected_type_warning is None:
             assert len(record) == 0
         else:
-            assert (
-                str(record.pop().message)
-                == "Tpr is ill-defined and "
-                "being set to 0.0 in labels with no positives samples."
-                " Use `zero_division` parameter to control"
-                " this behavior."
-            )
+            assert str(record.pop().message) == msg
     assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division_value], 2)
     assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2)
     assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
@@ -572,23 +570,16 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
         if expected_type_warning is None:
             assert len(record) == 0
         else:
-            assert (
-                str(record.pop().message)
-                == "Fnr is ill-defined and "
-                "being set to 0.0 in labels due to no positives samples."
-                " Use `zero_division` parameter to control"
-                " this behavior."
-            )
+            assert str(record.pop().message) == msg
     assert_almost_equal(tpr, 0.625 if zero_division_value else 0.375)
     assert_almost_equal(fpr, 1 / 3.0 / 4.0)
     assert_almost_equal(tnr, 0.91666, 5)
     assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
-    with pytest.warns(expected_type_warning) as record:
+    with pytest.does_not_warn():
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true, y_pred, average="micro", zero_division=zero_division
         )
-        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.125)
     assert_almost_equal(tnr, 0.875)
@@ -601,19 +592,13 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
         if expected_type_warning is None:
             assert len(record) == 0
         else:
-            assert (
-                str(record.pop().message)
-                == "Fnr is ill-defined and "
-                "being set to 0.0 in labels due to no positives samples."
-                " Use `zero_division` parameter to control"
-                " this behavior."
-            )
+            assert str(record.pop().message) == msg
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0)
     assert_almost_equal(tnr, 1.0)
     assert_almost_equal(fnr, 0.5)
 
-    with pytest.warns(expected_type_warning) as record:
+    with pytest.does_not_warn():
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true,
             y_pred,
@@ -621,7 +606,6 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
             sample_weight=[1, 1, 2],
             zero_division=zero_division,
         )
-        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.08333, 5)
     assert_almost_equal(tnr, 0.91666, 5)

From 1a3068cfe442e412a2f45363a42f48d62957bbd9 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 12:39:40 +0100
Subject: [PATCH 106/127] Update test_classification.py

Downgrade test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction()
---
 sklearn/metrics/tests/test_classification.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index ed11912b5878d..949d6579d44b6 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -576,10 +576,11 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 0.91666, 5)
     assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
-    with pytest.does_not_warn():
+    with pytest.warns(None) as record:
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true, y_pred, average="micro", zero_division=zero_division
         )
+        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.125)
     assert_almost_equal(tnr, 0.875)
@@ -598,7 +599,7 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 1.0)
     assert_almost_equal(fnr, 0.5)
 
-    with pytest.does_not_warn():
+    with pytest.warns(None) as record:
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true,
             y_pred,
@@ -606,6 +607,7 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
             sample_weight=[1, 1, 2],
             zero_division=zero_division,
         )
+        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.08333, 5)
     assert_almost_equal(tnr, 0.91666, 5)

From e4c8cc85abe1f40902402671a5c3656c767d5624 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 16:31:40 +0100
Subject: [PATCH 107/127] Update model_evaluation.rst

Expand and reorder docs.
---
 doc/modules/model_evaluation.rst | 173 +++++++++++++++++++------------
 1 file changed, 104 insertions(+), 69 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index e50e29ad0dff1..ab82b7d820734 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -399,6 +399,109 @@ array of class labels, multilabel data is specified as an indicator matrix,
 in which cell ``[i, j]`` has value 1 if sample ``i`` has label ``j`` and value
 0 otherwise.
 
+.. _confusion_matrix:
+
+Confusion matrix
+----------------
+
+The :func:`confusion_matrix` function evaluates
+classification accuracy by computing the `confusion matrix
+<https://en.wikipedia.org/wiki/Confusion_matrix>`_ with each row corresponding
+to the true class (Wikipedia and other references may use different convention
+for axes).
+
+By definition, entry :math:`i, j` in a confusion matrix is
+the number of observations actually in group :math:`i`, but
+predicted to be in group :math:`j`. Here is an example::
+
+  >>> from sklearn.metrics import confusion_matrix
+  >>> y_true = [2, 0, 2, 2, 0, 1]
+  >>> y_pred = [0, 0, 2, 2, 0, 2]
+  >>> confusion_matrix(y_true, y_pred)
+  array([[2, 0, 0],
+         [0, 0, 1],
+         [1, 0, 2]])
+
+:class:`ConfusionMatrixDisplay` can be used to visually represent a confusion
+matrix as shown in the
+:ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
+example, which creates the following figure:
+
+.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_confusion_matrix_001.png
+   :target: ../auto_examples/model_selection/plot_confusion_matrix.html
+   :scale: 75
+   :align: center
+
+The parameter ``normalize`` allows to report ratios instead of counts. The
+confusion matrix can be normalized in 3 different ways: ``'pred'``, ``'true'``,
+and ``'all'`` which will divide the counts by the sum of each columns, rows, or
+the entire matrix, respectively.
+
+  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
+  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
+  >>> confusion_matrix(y_true, y_pred, normalize='all')
+  array([[0.25 , 0.125],
+         [0.25 , 0.375]])
+
+For binary problems, we can get counts of true negatives, false positives,
+false negatives and true positives as follows::
+
+  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
+  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
+  >>> tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
+  >>> tn, fp, fn, tp
+  (2, 1, 2, 3)
+
+.. topic:: Example:
+
+  * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
+    for an example of using a confusion matrix to evaluate classifier output
+    quality.
+
+  * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py`
+    for an example of using a confusion matrix to classify
+    hand-written digits.
+
+  * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`
+    for an example of using a confusion matrix to classify text
+    documents.
+
+.. _tpr_fpr_tnr_fnr_score:
+
+TPR FPR TNR FNR score
+---------------------
+
+The :func:`tpr_fpr_tnr_fnr_score` function computes the true positive rate (TPR),
+false positive rate (FPR), true negative rate (TNR) and false negative rate (FNR)
+of predictions, based on the `confusion matrix <https://en.wikipedia.org/wiki/Confusion_matrix>`_.
+The rates are defined as
+
+.. math::
+
+  \texttt{TPR}(y, \hat{y}) = \frac{TP}{P}} = \frac{TP}{TP + FN}} = 1 - FNR
+
+  \texttt{FPR}(y, \hat{y}) = \frac{FP}{N}} = \frac{FP}{TN + FP}} = 1 - TNR
+
+  \texttt{TNR}(y, \hat{y}) = \frac{TN}{N}} = \frac{TN}{TN + FP}} = 1 - FPR
+
+  \texttt{FNR}(y, \hat{y}) = \frac{FN}{P}} = \frac{FN}{TP + FN}} = 1 - TPR
+
+  >>> from sklearn.metrics import tpr_fpr_tnr_fnr_score
+  >>> y_true = [2, 0, 2, 2, 0, 1]
+  >>> y_pred = [0, 0, 2, 2, 0, 2]
+  >>> tpr_fpr_tnr_fnr_score(y_true, y_pred)
+  (array([1.        , 0.        , 0.66666667]),
+  array([0.25      , 0.        , 0.33333333]),
+  array([0.75      , 1.        , 0.66666667]),
+  array([0.        , 1.        , 0.33333333]))
+
+.. note::
+
+    * True positive rate (TPR) is also called recall, sensitivity, or hit rate.
+    * False positive rate (FPR) is also called fall-out.
+    * True negative rate (TNR) is also called specificity, or selectivity.
+    * false negative rate (FNR) is also called miss rate.
+
 .. _accuracy_score:
 
 Accuracy score
@@ -408,7 +511,6 @@ The :func:`accuracy_score` function computes the
 `accuracy <https://en.wikipedia.org/wiki/Accuracy_and_precision>`_, either the fraction
 (default) or the count (normalize=False) of correct predictions.
 
-
 In multilabel classification, the function returns the subset accuracy. If
 the entire set of predicted labels for a sample strictly match with the true
 set of labels, then the subset accuracy is 1.0; otherwise it is 0.0.
@@ -547,7 +649,7 @@ or *informedness*.
 
     * Our definition: [Mosley2013]_, [Kelleher2015]_ and [Guyon2015]_, where
       [Guyon2015]_ adopt the adjusted version to ensure that random predictions
-      have a score of :math:`0` and perfect predictions have a score of :math:`1`..
+      have a score of :math:`0` and perfect predictions have a score of :math:`1`.
     * Class balanced accuracy as described in [Mosley2013]_: the minimum between the precision
       and the recall for each class is computed. Those values are then averaged over the total
       number of classes to get the balanced accuracy.
@@ -595,73 +697,6 @@ and not for more than two annotators.
   >>> cohen_kappa_score(y_true, y_pred)
   0.4285714285714286
 
-.. _confusion_matrix:
-
-Confusion matrix
-----------------
-
-The :func:`confusion_matrix` function evaluates
-classification accuracy by computing the `confusion matrix
-<https://en.wikipedia.org/wiki/Confusion_matrix>`_ with each row corresponding
-to the true class (Wikipedia and other references may use different convention
-for axes).
-
-By definition, entry :math:`i, j` in a confusion matrix is
-the number of observations actually in group :math:`i`, but
-predicted to be in group :math:`j`. Here is an example::
-
-  >>> from sklearn.metrics import confusion_matrix
-  >>> y_true = [2, 0, 2, 2, 0, 1]
-  >>> y_pred = [0, 0, 2, 2, 0, 2]
-  >>> confusion_matrix(y_true, y_pred)
-  array([[2, 0, 0],
-         [0, 0, 1],
-         [1, 0, 2]])
-
-:class:`ConfusionMatrixDisplay` can be used to visually represent a confusion
-matrix as shown in the
-:ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
-example, which creates the following figure:
-
-.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_confusion_matrix_001.png
-   :target: ../auto_examples/model_selection/plot_confusion_matrix.html
-   :scale: 75
-   :align: center
-
-The parameter ``normalize`` allows to report ratios instead of counts. The
-confusion matrix can be normalized in 3 different ways: ``'pred'``, ``'true'``,
-and ``'all'`` which will divide the counts by the sum of each columns, rows, or
-the entire matrix, respectively.
-
-  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
-  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
-  >>> confusion_matrix(y_true, y_pred, normalize='all')
-  array([[0.25 , 0.125],
-         [0.25 , 0.375]])
-
-For binary problems, we can get counts of true negatives, false positives,
-false negatives and true positives as follows::
-
-  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
-  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
-  >>> tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
-  >>> tn, fp, fn, tp
-  (2, 1, 2, 3)
-
-.. topic:: Example:
-
-  * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
-    for an example of using a confusion matrix to evaluate classifier output
-    quality.
-
-  * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py`
-    for an example of using a confusion matrix to classify
-    hand-written digits.
-
-  * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`
-    for an example of using a confusion matrix to classify text
-    documents.
-
 .. _classification_report:
 
 Classification report

From cfb6a626f4f85f3755e5894c165e0066722984da Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Wed, 26 Jan 2022 18:21:54 +0100
Subject: [PATCH 108/127] Update model_evaluation.rst

Expand docs.
---
 doc/modules/model_evaluation.rst | 70 +++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index ab82b7d820734..1aa174cb48bbe 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -478,13 +478,13 @@ The rates are defined as
 
 .. math::
 
-  \texttt{TPR}(y, \hat{y}) = \frac{TP}{P}} = \frac{TP}{TP + FN}} = 1 - FNR
+  \texttt{TPR} = \frac{TP}{P}} = \frac{TP}{TP + FN}} = 1 - FNR
 
-  \texttt{FPR}(y, \hat{y}) = \frac{FP}{N}} = \frac{FP}{TN + FP}} = 1 - TNR
+  \texttt{FPR} = \frac{FP}{N}} = \frac{FP}{TN + FP}} = 1 - TNR
 
-  \texttt{TNR}(y, \hat{y}) = \frac{TN}{N}} = \frac{TN}{TN + FP}} = 1 - FPR
+  \texttt{TNR} = \frac{TN}{N}} = \frac{TN}{TN + FP}} = 1 - FPR
 
-  \texttt{FNR}(y, \hat{y}) = \frac{FN}{P}} = \frac{FN}{TP + FN}} = 1 - TPR
+  \texttt{FNR} = \frac{FN}{P}} = \frac{FN}{TP + FN}} = 1 - TPR
 
   >>> from sklearn.metrics import tpr_fpr_tnr_fnr_score
   >>> y_true = [2, 0, 2, 2, 0, 1]
@@ -872,7 +872,6 @@ and is implemented by the :func:`specificity_score`.
     for an example of :func:`precision_recall_curve` usage to evaluate
     classifier output quality.
 
-
 .. topic:: References:
 
   .. [Manning2008] C.D. Manning, P. Raghavan, H. Schütze, `Introduction to Information Retrieval
@@ -889,7 +888,6 @@ and is implemented by the :func:`specificity_score`.
      <https://papers.nips.cc/paper/5867-precision-recall-gain-curves-pr-analysis-done-right.pdf>`_,
      NIPS 2015.
 
-
 Binary classification
 ^^^^^^^^^^^^^^^^^^^^^
 
@@ -913,11 +911,11 @@ In this context, we can define the notions of precision, recall and F-measure:
 
 .. math::
 
-   \text{precision} = \frac{tp}{tp + fp},
+   \text{precision} = \frac{TP}{TP + FP},
 
 .. math::
 
-   \text{recall} = \frac{tp}{tp + fn},
+   \text{recall} = \frac{TP}{TP + FN},
 
 .. math::
 
@@ -959,10 +957,9 @@ Here are some small examples in binary classification::
   >>> average_precision_score(y_true, y_scores)
   0.83...
 
-
-
 Multiclass and multilabel classification
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 In multiclass and multilabel classification task, the notions of precision,
 recall, and F-measures can be applied to each label independently.
 There are a few ways to combine results across labels,
@@ -1798,6 +1795,59 @@ the same does a lower Brier score loss always mean better calibration"
     and probability estimation." <https://drops.dagstuhl.de/opus/volltexte/2008/1382/>`_
     Dagstuhl Seminar Proceedings. Schloss Dagstuhl-Leibniz-Zentrum fr Informatik (2008).
 
+.. _true_negatives_metrics:
+
+Specificity and negative predictive value (NPV)
+-----------------------------------------------
+
+`Specificity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
+(also called selectivity or true negative rate) and
+`NPV <https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values>`_
+are both ratios of true negatives to, respectively, actual negatives and
+predicted negatives in a classification task.
+
+Binary classification
+^^^^^^^^^^^^^^^^^^^^^
+
+In a binary classification task, specificity and NPV are defined simply as
+
+..math::
+
+  \text{specificity} = \frac{TN}{N}} = \frac{TN}{TN + FP}}
+
+  \text{NPV} = \frac{TN}{TN + FN}}
+
+Multiclass and multilabel classification
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In a multiclass or multilabel classification task, the notions of specificity
+and NPV can be applied to each label independently. There are a few ways
+to combine results across labels, specified by the ``average`` argument
+to the :func:`specificity_score` and :func:`npv_score` functions, as described
+:ref:`above <average>`.
+
+To make this more explicit, consider the following examples:
+  >>> from sklearn.metrics import specificity_score
+  >>> from sklearn.metrics import npv_score
+  >>> y_true = [2, 0, 2, 2, 0, 1]
+  >>> y_pred = [0, 0, 2, 2, 0, 2]
+  >>> specificity_score(y_true, y_pred, average=None)
+  >>> array([0.75, 1.0, 0.66...])
+  >>> npv_score(y_true, y_pred, average=None)
+  >>> array([1.0, 0.83..., 0.66...])
+  >>> specificity_score(y_true, y_pred, average='macro')
+  >>> 0.8055...
+  >>> npv_score(y_true, y_pred, average='macro')
+  >>> 0.83...
+  >>> specificity_score(y_true, y_pred, average='micro')
+  >>> 0.83...
+  >>> npv_score(y_true, y_pred, average='micro')
+  >>> 0.83...
+  >>> specificity_score(y_true, y_pred, average='weighted')
+  >>> 0.75
+  >>> npv_score(y_true, y_pred, average='weighted')
+  >>> 0.8055...
+
 .. _multilabel_ranking_metrics:
 
 Multilabel ranking metrics

From eb9e340b15d4f2170aa8ce116d0e4c8618e916fe Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 1 Feb 2022 13:41:56 +0100
Subject: [PATCH 109/127] Update model_evaluation.rst

Update docs.
---
 doc/modules/model_evaluation.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 1aa174cb48bbe..79131ecd1717a 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -1836,7 +1836,7 @@ To make this more explicit, consider the following examples:
   >>> npv_score(y_true, y_pred, average=None)
   >>> array([1.0, 0.83..., 0.66...])
   >>> specificity_score(y_true, y_pred, average='macro')
-  >>> 0.8055...
+  >>> 0.805...
   >>> npv_score(y_true, y_pred, average='macro')
   >>> 0.83...
   >>> specificity_score(y_true, y_pred, average='micro')
@@ -1846,7 +1846,7 @@ To make this more explicit, consider the following examples:
   >>> specificity_score(y_true, y_pred, average='weighted')
   >>> 0.75
   >>> npv_score(y_true, y_pred, average='weighted')
-  >>> 0.8055...
+  >>> 0.805...
 
 .. _multilabel_ranking_metrics:
 

From 8687508d04ec6874147f0c5d6af853f6fd899e25 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 1 Feb 2022 14:12:21 +0100
Subject: [PATCH 110/127] Update model_evaluation.rst

Fix docs.
---
 doc/modules/model_evaluation.rst | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 79131ecd1717a..0064ea16ba5b0 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -1832,21 +1832,21 @@ To make this more explicit, consider the following examples:
   >>> y_true = [2, 0, 2, 2, 0, 1]
   >>> y_pred = [0, 0, 2, 2, 0, 2]
   >>> specificity_score(y_true, y_pred, average=None)
-  >>> array([0.75, 1.0, 0.66...])
+  array([0.75, 1.0, 0.66...])
   >>> npv_score(y_true, y_pred, average=None)
-  >>> array([1.0, 0.83..., 0.66...])
+  array([1.0, 0.83..., 0.66...])
   >>> specificity_score(y_true, y_pred, average='macro')
-  >>> 0.805...
+  0.805...
   >>> npv_score(y_true, y_pred, average='macro')
-  >>> 0.83...
+  0.83...
   >>> specificity_score(y_true, y_pred, average='micro')
-  >>> 0.83...
+  0.83...
   >>> npv_score(y_true, y_pred, average='micro')
-  >>> 0.83...
+  0.83...
   >>> specificity_score(y_true, y_pred, average='weighted')
-  >>> 0.75
+  0.75
   >>> npv_score(y_true, y_pred, average='weighted')
-  >>> 0.805...
+  0.805...
 
 .. _multilabel_ranking_metrics:
 

From 13e7f98b049379f15cf64065bcabee627a6fbee2 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 1 Feb 2022 14:46:16 +0100
Subject: [PATCH 111/127] Update model_evaluation.rst

Fix docs.
---
 doc/modules/model_evaluation.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 0064ea16ba5b0..16edcfe6d4a21 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -490,10 +490,10 @@ The rates are defined as
   >>> y_true = [2, 0, 2, 2, 0, 1]
   >>> y_pred = [0, 0, 2, 2, 0, 2]
   >>> tpr_fpr_tnr_fnr_score(y_true, y_pred)
-  (array([1.        , 0.        , 0.66666667]),
-  array([0.25      , 0.        , 0.33333333]),
-  array([0.75      , 1.        , 0.66666667]),
-  array([0.        , 1.        , 0.33333333]))
+  (array([1., 0., 0.66...]),
+  array([0.25, 0., 0.33...]),
+  array([0.75, 1., 0.66...]),
+  array([0., 1., 0.33...]))
 
 .. note::
 
@@ -1832,9 +1832,9 @@ To make this more explicit, consider the following examples:
   >>> y_true = [2, 0, 2, 2, 0, 1]
   >>> y_pred = [0, 0, 2, 2, 0, 2]
   >>> specificity_score(y_true, y_pred, average=None)
-  array([0.75, 1.0, 0.66...])
+  array([0.75, 1., 0.66...])
   >>> npv_score(y_true, y_pred, average=None)
-  array([1.0, 0.83..., 0.66...])
+  array([1., 0.83..., 0.66...])
   >>> specificity_score(y_true, y_pred, average='macro')
   0.805...
   >>> npv_score(y_true, y_pred, average='macro')

From 1f60a9c11a86304ab50aea5038efc42d90bead4e Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 1 Feb 2022 15:20:30 +0100
Subject: [PATCH 112/127] Update model_evaluation.rst

Fix docs.
---
 doc/modules/model_evaluation.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 16edcfe6d4a21..0f0753a9fc201 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -490,10 +490,10 @@ The rates are defined as
   >>> y_true = [2, 0, 2, 2, 0, 1]
   >>> y_pred = [0, 0, 2, 2, 0, 2]
   >>> tpr_fpr_tnr_fnr_score(y_true, y_pred)
-  (array([1., 0., 0.66...]),
-  array([0.25, 0., 0.33...]),
-  array([0.75, 1., 0.66...]),
-  array([0., 1., 0.33...]))
+  (array([1.        , 0.        , 0.66666667]),
+  array([0.25      , 0.        , 0.33333333]),
+  array([0.75      , 1.        , 0.66666667]),
+  array([0.        , 1.        , 0.33333333]))
 
 .. note::
 
@@ -1832,9 +1832,9 @@ To make this more explicit, consider the following examples:
   >>> y_true = [2, 0, 2, 2, 0, 1]
   >>> y_pred = [0, 0, 2, 2, 0, 2]
   >>> specificity_score(y_true, y_pred, average=None)
-  array([0.75, 1., 0.66...])
+  array([0.75      , 1.        , 0.66666667])
   >>> npv_score(y_true, y_pred, average=None)
-  array([1., 0.83..., 0.66...])
+  array([1.        , 0.83333333, 0.66666667])
   >>> specificity_score(y_true, y_pred, average='macro')
   0.805...
   >>> npv_score(y_true, y_pred, average='macro')

From 0d87bf09ed854aa391bdc0e4e626a00eef9dd8ba Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 27 Sep 2022 11:20:09 +0200
Subject: [PATCH 113/127] Update model_evaluation.rst

Align the PR docs with the main branch.
---
 doc/modules/model_evaluation.rst | 206 +++++++++++++++----------------
 1 file changed, 103 insertions(+), 103 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index ef55634f1a47f..1db800dcf07b5 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -404,109 +404,6 @@ array of class labels, multilabel data is specified as an indicator matrix,
 in which cell ``[i, j]`` has value 1 if sample ``i`` has label ``j`` and value
 0 otherwise.
 
-.. _confusion_matrix:
-
-Confusion matrix
-----------------
-
-The :func:`confusion_matrix` function evaluates
-classification accuracy by computing the `confusion matrix
-<https://en.wikipedia.org/wiki/Confusion_matrix>`_ with each row corresponding
-to the true class (Wikipedia and other references may use different convention
-for axes).
-
-By definition, entry :math:`i, j` in a confusion matrix is
-the number of observations actually in group :math:`i`, but
-predicted to be in group :math:`j`. Here is an example::
-
-  >>> from sklearn.metrics import confusion_matrix
-  >>> y_true = [2, 0, 2, 2, 0, 1]
-  >>> y_pred = [0, 0, 2, 2, 0, 2]
-  >>> confusion_matrix(y_true, y_pred)
-  array([[2, 0, 0],
-         [0, 0, 1],
-         [1, 0, 2]])
-
-:class:`ConfusionMatrixDisplay` can be used to visually represent a confusion
-matrix as shown in the
-:ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
-example, which creates the following figure:
-
-.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_confusion_matrix_001.png
-   :target: ../auto_examples/model_selection/plot_confusion_matrix.html
-   :scale: 75
-   :align: center
-
-The parameter ``normalize`` allows to report ratios instead of counts. The
-confusion matrix can be normalized in 3 different ways: ``'pred'``, ``'true'``,
-and ``'all'`` which will divide the counts by the sum of each columns, rows, or
-the entire matrix, respectively.
-
-  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
-  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
-  >>> confusion_matrix(y_true, y_pred, normalize='all')
-  array([[0.25 , 0.125],
-         [0.25 , 0.375]])
-
-For binary problems, we can get counts of true negatives, false positives,
-false negatives and true positives as follows::
-
-  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
-  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
-  >>> tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
-  >>> tn, fp, fn, tp
-  (2, 1, 2, 3)
-
-.. topic:: Example:
-
-  * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
-    for an example of using a confusion matrix to evaluate classifier output
-    quality.
-
-  * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py`
-    for an example of using a confusion matrix to classify
-    hand-written digits.
-
-  * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`
-    for an example of using a confusion matrix to classify text
-    documents.
-
-.. _tpr_fpr_tnr_fnr_score:
-
-TPR FPR TNR FNR score
----------------------
-
-The :func:`tpr_fpr_tnr_fnr_score` function computes the true positive rate (TPR),
-false positive rate (FPR), true negative rate (TNR) and false negative rate (FNR)
-of predictions, based on the `confusion matrix <https://en.wikipedia.org/wiki/Confusion_matrix>`_.
-The rates are defined as
-
-.. math::
-
-  \texttt{TPR} = \frac{TP}{P}} = \frac{TP}{TP + FN}} = 1 - FNR
-
-  \texttt{FPR} = \frac{FP}{N}} = \frac{FP}{TN + FP}} = 1 - TNR
-
-  \texttt{TNR} = \frac{TN}{N}} = \frac{TN}{TN + FP}} = 1 - FPR
-
-  \texttt{FNR} = \frac{FN}{P}} = \frac{FN}{TP + FN}} = 1 - TPR
-
-  >>> from sklearn.metrics import tpr_fpr_tnr_fnr_score
-  >>> y_true = [2, 0, 2, 2, 0, 1]
-  >>> y_pred = [0, 0, 2, 2, 0, 2]
-  >>> tpr_fpr_tnr_fnr_score(y_true, y_pred)
-  (array([1.        , 0.        , 0.66666667]),
-  array([0.25      , 0.        , 0.33333333]),
-  array([0.75      , 1.        , 0.66666667]),
-  array([0.        , 1.        , 0.33333333]))
-
-.. note::
-
-    * True positive rate (TPR) is also called recall, sensitivity, or hit rate.
-    * False positive rate (FPR) is also called fall-out.
-    * True negative rate (TNR) is also called specificity, or selectivity.
-    * false negative rate (FNR) is also called miss rate.
-
 .. _accuracy_score:
 
 Accuracy score
@@ -702,6 +599,109 @@ and not for more than two annotators.
   >>> cohen_kappa_score(y_true, y_pred)
   0.4285714285714286
 
+.. _confusion_matrix:
+
+Confusion matrix
+----------------
+
+The :func:`confusion_matrix` function evaluates
+classification accuracy by computing the `confusion matrix
+<https://en.wikipedia.org/wiki/Confusion_matrix>`_ with each row corresponding
+to the true class (Wikipedia and other references may use different convention
+for axes).
+
+By definition, entry :math:`i, j` in a confusion matrix is
+the number of observations actually in group :math:`i`, but
+predicted to be in group :math:`j`. Here is an example::
+
+  >>> from sklearn.metrics import confusion_matrix
+  >>> y_true = [2, 0, 2, 2, 0, 1]
+  >>> y_pred = [0, 0, 2, 2, 0, 2]
+  >>> confusion_matrix(y_true, y_pred)
+  array([[2, 0, 0],
+         [0, 0, 1],
+         [1, 0, 2]])
+
+:class:`ConfusionMatrixDisplay` can be used to visually represent a confusion
+matrix as shown in the
+:ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
+example, which creates the following figure:
+
+.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_confusion_matrix_001.png
+   :target: ../auto_examples/model_selection/plot_confusion_matrix.html
+   :scale: 75
+   :align: center
+
+The parameter ``normalize`` allows to report ratios instead of counts. The
+confusion matrix can be normalized in 3 different ways: ``'pred'``, ``'true'``,
+and ``'all'`` which will divide the counts by the sum of each columns, rows, or
+the entire matrix, respectively.
+
+  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
+  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
+  >>> confusion_matrix(y_true, y_pred, normalize='all')
+  array([[0.25 , 0.125],
+         [0.25 , 0.375]])
+
+For binary problems, we can get counts of true negatives, false positives,
+false negatives and true positives as follows::
+
+  >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1]
+  >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1]
+  >>> tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
+  >>> tn, fp, fn, tp
+  (2, 1, 2, 3)
+
+.. topic:: Example:
+
+  * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
+    for an example of using a confusion matrix to evaluate classifier output
+    quality.
+
+  * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py`
+    for an example of using a confusion matrix to classify
+    hand-written digits.
+
+  * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`
+    for an example of using a confusion matrix to classify text
+    documents.
+
+.. _tpr_fpr_tnr_fnr_score:
+
+TPR FPR TNR FNR score
+---------------------
+
+The :func:`tpr_fpr_tnr_fnr_score` function computes the true positive rate (TPR),
+false positive rate (FPR), true negative rate (TNR) and false negative rate (FNR)
+of predictions, based on the `confusion matrix <https://en.wikipedia.org/wiki/Confusion_matrix>`_.
+The rates are defined as
+
+.. math::
+
+  \texttt{TPR} = \frac{TP}{P}} = \frac{TP}{TP + FN}} = 1 - FNR
+
+  \texttt{FPR} = \frac{FP}{N}} = \frac{FP}{TN + FP}} = 1 - TNR
+
+  \texttt{TNR} = \frac{TN}{N}} = \frac{TN}{TN + FP}} = 1 - FPR
+
+  \texttt{FNR} = \frac{FN}{P}} = \frac{FN}{TP + FN}} = 1 - TPR
+
+  >>> from sklearn.metrics import tpr_fpr_tnr_fnr_score
+  >>> y_true = [2, 0, 2, 2, 0, 1]
+  >>> y_pred = [0, 0, 2, 2, 0, 2]
+  >>> tpr_fpr_tnr_fnr_score(y_true, y_pred)
+  (array([1.        , 0.        , 0.66666667]),
+  array([0.25      , 0.        , 0.33333333]),
+  array([0.75      , 1.        , 0.66666667]),
+  array([0.        , 1.        , 0.33333333]))
+
+.. note::
+
+    * True positive rate (TPR) is also called recall, sensitivity, or hit rate.
+    * False positive rate (FPR) is also called fall-out.
+    * True negative rate (TNR) is also called specificity, or selectivity.
+    * false negative rate (FNR) is also called miss rate.
+
 .. _classification_report:
 
 Classification report

From 3cb2c1c58417ec745ac1448a921abfd44a7f2e97 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 27 Sep 2022 11:23:59 +0200
Subject: [PATCH 114/127] Update v1.2.rst

---
 doc/whats_new/v1.2.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index 2abed8b90b850..ab3b1efef9464 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -382,6 +382,11 @@ Changelog
   (`average="micro"`) for the One-vs-Rest multiclass case (`multi_class="ovr"`).
   :pr:`24338` by :user:`Arturo Amor <ArturoAmorQ>`.
 
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_score`,
+  :func:`metrics.specificity_score` and :func:`metrics.npv_score`.
+  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
+  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 

From f31bd337b546c87d8a85e993a1a5efb30203a0e1 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 27 Sep 2022 16:01:32 +0200
Subject: [PATCH 115/127] Update test_classification.py

Align test_npv_warnings() and test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction() with pytest 7.
---
 sklearn/metrics/tests/test_classification.py | 34 +++++++++++---------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index dcc3dee62c5bf..40e768e9c3b04 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -547,15 +547,15 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
         "Tpr is ill-defined and being set to 0.0 in labels with no positives samples."
         " Use `zero_division` parameter to control this behavior."
     )
-    expected_type_warning = UndefinedMetricWarning if zero_division == "warn" else None
 
     zero_division_value = 1.0 if zero_division == 1.0 else 0.0
 
-    with pytest.warns(expected_type_warning) as record:
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter("always")
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true, y_pred, average=None, zero_division=zero_division
         )
-        if expected_type_warning is None:
+        if zero_division == "warn":
             assert len(record) == 0
         else:
             assert str(record.pop().message) == msg
@@ -564,11 +564,12 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
     assert_array_almost_equal(fnr, [1.0, 0.5, 0.0, zero_division_value], 2)
 
-    with pytest.warns(expected_type_warning) as record:
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter("always")
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true, y_pred, average="macro", zero_division=zero_division
         )
-        if expected_type_warning is None:
+        if zero_division == "warn":
             assert len(record) == 0
         else:
             assert str(record.pop().message) == msg
@@ -577,21 +578,22 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 0.91666, 5)
     assert_almost_equal(fnr, 0.625 if zero_division_value else 0.375)
 
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true, y_pred, average="micro", zero_division=zero_division
         )
-        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.125)
     assert_almost_equal(tnr, 0.875)
     assert_almost_equal(fnr, 0.5)
 
-    with pytest.warns(expected_type_warning) as record:
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter("always")
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true, y_pred, average="weighted", zero_division=zero_division
         )
-        if expected_type_warning is None:
+        if zero_division == "warn":
             assert len(record) == 0
         else:
             assert str(record.pop().message) == msg
@@ -600,7 +602,8 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     assert_almost_equal(tnr, 1.0)
     assert_almost_equal(fnr, 0.5)
 
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         tpr, fpr, tnr, fnr = tpr_fpr_tnr_fnr_score(
             y_true,
             y_pred,
@@ -608,7 +611,6 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
             sample_weight=[1, 1, 2],
             zero_division=zero_division,
         )
-        assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0.08333, 5)
     assert_almost_equal(tnr, 0.91666, 5)
@@ -2605,19 +2607,19 @@ def test_npv_warnings(zero_division):
         "Npv is ill-defined and being set to 0.0 due to no negative call samples."
         " Use `zero_division` parameter to control this behavior."
     )
-    expected_type_warning = UndefinedMetricWarning if zero_division == "warn" else None
 
-    with pytest.warns(expected_type_warning) as record:
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter("always")
         npv_score(
             np.array([[0, 0], [0, 0]]),
             np.array([[1, 1], [1, 1]]),
             average="micro",
             zero_division=zero_division,
         )
-        if expected_type_warning is None:
-            assert len(record) == 0
-        else:
+        if zero_division == "warn":
             assert str(record[-1].message) == msg
+        else:
+            assert len(record) == 0
 
     with pytest.warns(UndefinedMetricWarning, match=msg):
         npv_score([1, 1], [1, 1])

From 75edeedaaf4e728728b2dd856cb2c41cb62f2601 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 27 Sep 2022 16:36:38 +0200
Subject: [PATCH 116/127] Update test_classification.py

Fix test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction()
---
 sklearn/metrics/tests/test_classification.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 0d640410fb0f9..80da2f2311ced 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -557,9 +557,9 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
             y_true, y_pred, average=None, zero_division=zero_division
         )
         if zero_division == "warn":
-            assert len(record) == 0
-        else:
             assert str(record.pop().message) == msg
+        else:
+            assert len(record) == 0
     assert_array_almost_equal(tpr, [0.0, 0.5, 1.0, zero_division_value], 2)
     assert_array_almost_equal(fpr, [0.0, 0.0, 0.0, 1 / 3.0], 2)
     assert_array_almost_equal(tnr, [1.0, 1.0, 1.0, 2 / 3.0], 2)
@@ -571,9 +571,9 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
             y_true, y_pred, average="macro", zero_division=zero_division
         )
         if zero_division == "warn":
-            assert len(record) == 0
-        else:
             assert str(record.pop().message) == msg
+        else:
+            assert len(record) == 0
     assert_almost_equal(tpr, 0.625 if zero_division_value else 0.375)
     assert_almost_equal(fpr, 1 / 3.0 / 4.0)
     assert_almost_equal(tnr, 0.91666, 5)
@@ -595,9 +595,9 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
             y_true, y_pred, average="weighted", zero_division=zero_division
         )
         if zero_division == "warn":
-            assert len(record) == 0
-        else:
             assert str(record.pop().message) == msg
+        else:
+            assert len(record) == 0
     assert_almost_equal(tpr, 0.5)
     assert_almost_equal(fpr, 0)
     assert_almost_equal(tnr, 1.0)

From 831d424ff47c28e0ae9a0bb0e68ef1ff04c2154c Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 27 Sep 2022 17:09:16 +0200
Subject: [PATCH 117/127] Update test_classification.py

Fix test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction().
---
 sklearn/metrics/tests/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 80da2f2311ced..b99551f572581 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -545,7 +545,7 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])
 
     msg = (
-        "Tpr is ill-defined and being set to 0.0 in labels with no positives samples."
+        "Fnr is ill-defined and being set to 0.0 in labels with no positives samples."
         " Use `zero_division` parameter to control this behavior."
     )
 

From b036a50d0aa710768cc09c0e677617240652a834 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Fri, 26 May 2023 20:21:59 +0200
Subject: [PATCH 118/127] Update test_classification.py

---
 sklearn/metrics/tests/test_classification.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 22382bdc68ff6..85147494afe2a 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -618,17 +618,6 @@ def test_tpr_fpr_tnr_fnr_score_with_an_empty_prediction(zero_division):
     assert_almost_equal(fnr, 0.5)
 
 
-def test_average_precision_score_score_non_binary_class():
-    # Test that average_precision_score function returns an error when trying
-    # to compute average_precision_score for multiclass task.
-    rng = check_random_state(404)
-    y_pred = rng.rand(10)
-
-    # y_true contains three different class values
-    y_true = rng.randint(0, 3, size=10)
-    err_msg = "multiclass format is not supported"
-
-
 def test_average_precision_score_non_binary_class():
     """Test multiclass-multiouptut for `average_precision_score`."""
     y_true = np.array(

From 3660888655e0fb6544ef68df645f248b44e669fc Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Thu, 22 Jun 2023 16:07:02 +0200
Subject: [PATCH 119/127] Update test_multiclass.py

Linting fix
---
 sklearn/tests/test_multiclass.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 6d31d83a9d43e..8cbf58dd7d035 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -19,10 +19,10 @@
     SGDClassifier,
 )
 from sklearn.metrics import (
-  npv_score,
-  precision_score,
-  recall_score,
-  specificity_score,
+    npv_score,
+    precision_score,
+    recall_score,
+    specificity_score,
 )
 from sklearn.model_selection import GridSearchCV, cross_val_score
 from sklearn.multiclass import (

From 7e996603714953c64fe0f04096818584ddb11d6d Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 8 Jul 2023 07:51:13 +0200
Subject: [PATCH 120/127] Update v1.4.rst

---
 doc/whats_new/v1.4.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 4ba357c52d136..6986116719c14 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -78,6 +78,14 @@ Changelog
 - |Fix| :func:`feature_selection.mutual_info_regression` now correctly computes the
   result when `X` is of integer dtype. :pr:`26748` by :user:`Yao Xiao <Charlie-XIAO>`.
 
+:mod:`sklearn.metrics`
+......................
+
+- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_score`,
+  :func:`metrics.specificity_score` and :func:`metrics.npv_score`.
+  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
+  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
+
 :mod:`sklearn.tree`
 ...................
 

From 7f74ff6f41a9ac48c630edc4b024621efd97abdf Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 8 Jul 2023 07:51:57 +0200
Subject: [PATCH 121/127] Update v1.2.rst

---
 doc/whats_new/v1.2.rst | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index 6f25fc3ba5dc3..4300e1449a45b 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -822,11 +822,6 @@ Changelog
   :func:`metrics.pairwise.manhattan_distances` is deprecated and will be removed in 1.4.
   :pr:`24630` by :user:`Rushil Desai <rusdes>`.
 
-- |Feature| Add :func:`metrics.fpr_tpr_fnr_tnr_score`,
-  :func:`metrics.specificity_score` and :func:`metrics.npv_score`.
-  :pr:`19556` by :user:`Hao Chun Chang <haochunchang>`
-  and :user:`Pawel Kranzberg <Pawel-Kranzberg>`.
-
 :mod:`sklearn.model_selection`
 ..............................
 
@@ -1067,4 +1062,4 @@ Pitters, Tom Dupré la Tour, tomiock, Tom Mathews, Tom McTiernan, tspeng, Tyler
 Egashira, Valentin Laurent, Varun Jain, Vera Komeyer, Vicente Reyes-Puerta,
 Vinayak Mehta, Vincent M, Vishal, Vyom Pathak, wattai, wchathura, WEN Hao,
 William M, x110, Xiao Yuan, Xunius, yanhong-zhao-ef, Yusuf Raji, Z Adil Khwaja,
-zeeshan lone
\ No newline at end of file
+zeeshan lone

From 9e3fa2629fd321e5a185d0997b5f13d56d409fc2 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sat, 20 Apr 2024 06:05:46 +0200
Subject: [PATCH 122/127] Fix linting in test_classification.py

---
 sklearn/metrics/tests/test_classification.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 9d5ebe87a3373..109348e9de692 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2600,8 +2600,7 @@ def test_specificity_warnings(zero_division):
         )
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Tnr is ill-defined and "
+                str(record.pop().message) == "Tnr is ill-defined and "
                 "being set to 0.0 due to no negatives samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2611,8 +2610,7 @@ def test_specificity_warnings(zero_division):
 
         specificity_score([1, 1], [1, 1])
         assert (
-            str(record.pop().message)
-            == "Tnr is ill-defined and "
+            str(record.pop().message) == "Tnr is ill-defined and "
             "being set to 0.0 due to no negatives samples."
             " Use `zero_division` parameter to control"
             " this behavior."

From 582387cc4e0bcab835ba8d057cfdfbca059938b7 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Sun, 19 May 2024 11:33:53 +0200
Subject: [PATCH 123/127] Fix linting in _classification.py

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 8b2b2091e1dc6..736e80693f109 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -3892,4 +3892,4 @@ def npv_score(
         assert average != "binary" or len(NPV) == 1, "Non-binary target."
         NPV = np.average(NPV, weights=weights)
 
-    return NPV
\ No newline at end of file
+    return NPV

From 7877673206dfc14cfb8355d3702e4b2c16d8d456 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 18 Mar 2025 14:29:27 +0100
Subject: [PATCH 124/127] Remove the legacy assert_no_warnings in
 test_classification.py

---
 sklearn/metrics/tests/test_classification.py | 35 +++++++++++---------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index a887896094e70..d088190c08c0a 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -387,7 +387,10 @@ def test_tpr_fpr_tnr_fnr_score_binary_averaged():
     assert_array_almost_equal(tnrs, [0.68, 0.88], 2)
     assert_array_almost_equal(fnrs, [0.12, 0.32], 2)
 
-    tn, fp, fn, tp = assert_no_warnings(confusion_matrix, y_true, y_pred).ravel()
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
+
     assert_array_almost_equal(tp / (tp + fn), 0.68, 2)
     assert_array_almost_equal(fp / (tn + fp), 0.12, 2)
     assert_array_almost_equal(tn / (tn + fp), 0.88, 2)
@@ -2715,13 +2718,14 @@ def test_fscore_warnings(zero_division):
 
 @pytest.mark.parametrize("zero_division", ["warn", 0, 1])
 def test_specificity_warnings(zero_division):
-    assert_no_warnings(
-        specificity_score,
-        np.array([[0, 0], [0, 0]]),
-        np.array([[1, 1], [1, 1]]),
-        average="micro",
-        zero_division=zero_division,
-    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        specificity_score(
+            np.array([[0, 0], [0, 0]]),
+            np.array([[1, 1], [1, 1]]),
+            average="micro",
+            zero_division=zero_division,
+        )
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter("always")
         specificity_score(
@@ -2878,13 +2882,14 @@ def test_npv_multiclass():
 
 @pytest.mark.parametrize("zero_division", ["warn", 0, 1])
 def test_npv_warnings(zero_division):
-    assert_no_warnings(
-        npv_score,
-        np.array([[1, 1], [1, 1]]),
-        np.array([[0, 0], [0, 0]]),
-        average="micro",
-        zero_division=zero_division,
-    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        npv_score(
+            np.array([[1, 1], [1, 1]]),
+            np.array([[0, 0], [0, 0]]),
+            average="micro",
+            zero_division=zero_division,
+        )
 
     msg = (
         "Npv is ill-defined and being set to 0.0 due to no negative call samples."

From 9da387cb9260ce1f344f9de1141fb77195d0d89b Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 18 Mar 2025 16:16:11 +0100
Subject: [PATCH 125/127] Convert score values to float in _classification.py

---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 68c0a115fcc7f..5c08b219a971e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -3784,7 +3784,7 @@ def tpr_fpr_tnr_fnr_score(
         fpr = np.average(fpr, weights=weights)
         tnr = np.average(tnr, weights=weights)
         fnr = np.average(fnr, weights=weights)
-    return tpr, fpr, tnr, fnr
+    return float(tpr), float(fpr), float(tnr), float(fnr)
 
 
 def specificity_score(
@@ -4090,4 +4090,4 @@ def npv_score(
         assert average != "binary" or len(NPV) == 1, "Non-binary target."
         NPV = np.average(NPV, weights=weights)
 
-    return NPV
+    return float(NPV)

From 5ee31a59796271c034b082221d67280fe90ed656 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 18 Mar 2025 17:48:43 +0100
Subject: [PATCH 126/127] Update _classification.py

---
 sklearn/metrics/_classification.py | 170 +++++++++++++++++++----------
 1 file changed, 111 insertions(+), 59 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 5c08b219a971e..b813f6dad1774 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -3554,6 +3554,25 @@ def d2_log_loss_score(y_true, y_pred, *, sample_weight=None, labels=None):
     return float(1 - (numerator / denominator))
 
 
+@validate_params(
+    {
+        "y_true": ["array-like", "sparse matrix"],
+        "y_pred": ["array-like", "sparse matrix"],
+        "labels": ["array-like", None],
+        "pos_label": [Real, str, "boolean", None],
+        "average": [
+            StrOptions({"binary", "macro", "micro", "samples", "weighted"}),
+            None,
+        ],
+        "warn_for": : [list, tuple, set],
+        "sample_weight": ["array-like", None],
+        "zero_division": [
+            Options(Real, {0, 1}),
+            StrOptions({"warn"}),
+        ],
+    },
+    prefer_skip_nested_validation=True,
+)
 def tpr_fpr_tnr_fnr_score(
     y_true,
     y_pred,
@@ -3607,13 +3626,13 @@ def tpr_fpr_tnr_fnr_score(
         labels are column indices. By default, all labels in `y_true` and
         `y_pred` are used in sorted order.
 
-    pos_label : str or int, default=1
-        The class to report if `average="binary"` and the data is binary.
-        If the data are multiclass or multilabel, this will be ignored;
-        setting `labels=[pos_label]` and `average != "binary"` will report
-        scores for that label only.
+    pos_label : int, float, bool or str, default=1
+        The class to report if `average='binary'` and the data is binary,
+        otherwise this parameter is ignored.
+        For multiclass or multilabel targets, set `labels=[pos_label]` and
+        `average != 'binary'` to report metrics for one label only.
 
-    average : {"binary", "micro", "macro", "samples", "weighted"} or None, \
+    average : {"binary", "macro", "micro", "samples", "weighted"} or None, \
             default=None
         If `None`, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
@@ -3621,22 +3640,22 @@ def tpr_fpr_tnr_fnr_score(
         `"binary"`:
             Only report results for the class specified by `pos_label`.
             This is applicable only if targets (`y_{true,pred}`) are binary.
-        `"micro"`:
-            Calculate metrics globally by counting the total true positives,
-            false negatives and false positives.
         `"macro"`:
             Calculate metrics for each label, and find their unweighted
             mean.  This does not take label imbalance into account.
-        `"weighted"`:
-            Calculate metrics for each label, and find their average weighted
-            by support (the number of true instances for each label). This
-            alters 'macro' to account for label imbalance.
+        `"micro"`:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
         `"samples"`:
             Calculate metrics for each instance, and find their average (only
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
+        `"weighted"`:
+            Calculate metrics for each label, and find their average weighted
+            by support (the number of true instances for each label). This
+            alters 'macro' to account for label imbalance.
 
-    warn_for : tuple or set, for internal use
+    warn_for : list, tuple or set, for internal use
         This determines which warnings will be made in the case that this
         function is being used to return only one of its metrics.
 
@@ -3759,8 +3778,11 @@ def tpr_fpr_tnr_fnr_score(
     fnr = _prf_divide(
         fn_sum, pos_sum, "FNR", "positives", average, warn_for, zero_division
     )
+    if average is None:
+        return tpr, fpr, tnr, fnr
+
     # Average the results
-    if average == "weighted":
+    elif average == "weighted":
         weights = pos_sum
         if weights.sum() == 0:
             zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
@@ -3772,21 +3794,36 @@ def tpr_fpr_tnr_fnr_score(
                 zero_division_value if neg_sum.sum() == 0 else 0,
                 zero_division_value if pos_sum.sum() == 0 else 0,
             )
-
-    elif average == "samples":
+    elif average == "samples" and sample_weight is not None:
         weights = sample_weight
     else:
         weights = None
-
-    if average is not None:
-        assert average != "binary" or len(fpr) == 1, "Non-binary target."
-        tpr = np.average(tpr, weights=weights)
-        fpr = np.average(fpr, weights=weights)
-        tnr = np.average(tnr, weights=weights)
-        fnr = np.average(fnr, weights=weights)
-    return float(tpr), float(fpr), float(tnr), float(fnr)
+    assert average != "binary" or len(fpr) == 1, "Non-binary target."
+    tpr = float(np.average(tpr, weights=weights))
+    fpr = float(np.average(fpr, weights=weights))
+    tnr = float(np.average(tnr, weights=weights))
+    fnr = float(np.average(fnr, weights=weights))
+    return tpr, fpr, tnr, fnr
 
 
+@validate_params(
+    {
+        "y_true": ["array-like", "sparse matrix"],
+        "y_pred": ["array-like", "sparse matrix"],
+        "labels": ["array-like", None],
+        "pos_label": [Real, str, "boolean", None],
+        "average": [
+            StrOptions({"binary", "macro", "micro", "samples", "weighted"}),
+            None,
+        ],
+        "sample_weight": ["array-like", None],
+        "zero_division": [
+            Options(Real, {0, 1}),
+            StrOptions({"warn"}),
+        ],
+    },
+    prefer_skip_nested_validation=True,
+)
 def specificity_score(
     y_true,
     y_pred,
@@ -3829,13 +3866,13 @@ def specificity_score(
         labels are column indices. By default, all labels in `y_true` and
         `y_pred` are used in sorted order.
 
-    pos_label : str or int, default=1
-        The class to report if `average="binary"` and the data is binary.
-        If the data are multiclass or multilabel, this will be ignored;
-        setting `labels=[pos_label]` and `average != "binary"` will report
-        scores for that label only.
+    pos_label : int, float, bool or str, default=1
+        The class to report if `average='binary'` and the data is binary,
+        otherwise this parameter is ignored.
+        For multiclass or multilabel targets, set `labels=[pos_label]` and
+        `average != 'binary'` to report metrics for one label only.
 
-    average : {"binary", "micro", "macro", "samples", "weighted"} or None \
+    average : {"binary", "macro", "micro", "samples", "weighted"} or None \
             default="binary"
         This parameter is required for multiclass/multilabel targets.
         If `None`, the scores for each class are returned. Otherwise, this
@@ -3844,21 +3881,21 @@ def specificity_score(
         `"binary"`:
             Only report results for the class specified by `pos_label`.
             This is applicable only if targets (`y_{true,pred}`) are binary.
-        `"micro"`:
-            Calculate metrics globally by counting the total true positives,
-            false negatives and false positives.
         `"macro"`:
             Calculate metrics for each label, and find their unweighted
             mean.  This does not take label imbalance into account.
+        `"micro"`:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
+        `"samples"`:
+            Calculate metrics for each instance, and find their average (only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`).
         `"weighted"`:
             Calculate metrics for each label, and find their average weighted
             by support (the number of true instances for each label). This
             alters 'macro' to account for label imbalance; it can result in an
             F-score that is not between precision and recall.
-        `"samples"`:
-            Calculate metrics for each instance, and find their average (only
-            meaningful for multilabel classification where this differs from
-            :func:`accuracy_score`).
 
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
@@ -3923,6 +3960,24 @@ def specificity_score(
     return tnr
 
 
+@validate_params(
+    {
+        "y_true": ["array-like", "sparse matrix"],
+        "y_pred": ["array-like", "sparse matrix"],
+        "labels": ["array-like", None],
+        "pos_label": [Real, str, "boolean", None],
+        "average": [
+            StrOptions({"binary", "macro", "micro", "samples", "weighted"}),
+            None,
+        ],
+        "sample_weight": ["array-like", None],
+        "zero_division": [
+            Options(Real, {0, 1}),
+            StrOptions({"warn"}),
+        ],
+    },
+    prefer_skip_nested_validation=True,
+)
 def npv_score(
     y_true,
     y_pred,
@@ -3963,13 +4018,13 @@ def npv_score(
         labels are column indices. By default, all labels in `y_true` and
         `y_pred` are used in sorted order.
 
-    pos_label : str or int, default=1
-        The class to report if `average="binary"` and the data is binary.
-        If the data are multiclass or multilabel, this will be ignored;
-        setting `labels=[pos_label]` and `average != "binary"` will report
-        scores for that label only.
+    pos_label : int, float, bool or str, default=1
+        The class to report if `average='binary'` and the data is binary,
+        otherwise this parameter is ignored.
+        For multiclass or multilabel targets, set `labels=[pos_label]` and
+        `average != 'binary'` to report metrics for one label only.
 
-    average : {"binary", "micro", "macro", "samples", "weighted"}, None \
+    average : {"binary", "macro", "micro", "samples", "weighted"}, None \
             default="binary"
         This parameter is required for multiclass/multilabel targets.
         If `None`, the scores for each class are returned. Otherwise, this
@@ -3978,21 +4033,21 @@ def npv_score(
         `"binary"`:
             Only report results for the class specified by `pos_label`.
             This is applicable only if targets (`y_{true,pred}`) are binary.
-        `"micro"`:
-            Calculate metrics globally by counting the total true positives,
-            false negatives and false positives.
         `"macro"`:
             Calculate metrics for each label, and find their unweighted
             mean.  This does not take label imbalance into account.
+        `"micro"`:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
+        `"samples"`:
+            Calculate metrics for each instance, and find their average (only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`).
         `"weighted"`:
             Calculate metrics for each label, and find their average weighted
             by support (the number of true instances for each label). This
             alters 'macro' to account for label imbalance; it can result in an
             F-score that is not between precision and recall.
-        `"samples"`:
-            Calculate metrics for each instance, and find their average (only
-            meaningful for multilabel classification where this differs from
-            :func:`accuracy_score`).
 
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
@@ -4066,7 +4121,6 @@ def npv_score(
 
     if average == "micro":
         tn_sum = np.array([tn_sum.sum()])
-        fn_sum = np.array([fn_sum.sum()])
         neg_calls_sum = np.array([neg_calls_sum.sum()])
 
     # Divide, and on zero-division, set scores and/or warn according to
@@ -4074,9 +4128,10 @@ def npv_score(
     NPV = _prf_divide(
         tn_sum, neg_calls_sum, "NPV", "negative call", average, "NPV", zero_division
     )
-
+    if average is None:
+        return NPV
     # Average the results
-    if average == "weighted":
+    elif average == "weighted":
         weights = pos_sum
         if weights.sum() == 0:
             zero_division_value = 0.0 if zero_division in ["warn", 0] else 1.0
@@ -4086,8 +4141,5 @@ def npv_score(
         weights = sample_weight
     else:
         weights = None
-    if average is not None:
-        assert average != "binary" or len(NPV) == 1, "Non-binary target."
-        NPV = np.average(NPV, weights=weights)
-
-    return float(NPV)
+    assert average != "binary" or len(NPV) == 1, "Non-binary target."
+    return float(np.average(NPV, weights=weights))

From d4bbd825adb57745115a4129fc9b56ae2ea79255 Mon Sep 17 00:00:00 2001
From: Pawel Kranzberg <pawel@kranzberg.co>
Date: Tue, 18 Mar 2025 17:51:41 +0100
Subject: [PATCH 127/127] Update _classification.py

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b813f6dad1774..f78b5fece5bd3 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -3564,7 +3564,7 @@ def d2_log_loss_score(y_true, y_pred, *, sample_weight=None, labels=None):
             StrOptions({"binary", "macro", "micro", "samples", "weighted"}),
             None,
         ],
-        "warn_for": : [list, tuple, set],
+        "warn_for": [list, tuple, set],
         "sample_weight": ["array-like", None],
         "zero_division": [
             Options(Real, {0, 1}),