From 59b2f4bce7cb5da740c1c1ce1f1c31c2d2699638 Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Wed, 1 May 2019 20:56:43 +0200 Subject: [PATCH 1/7] Clarified link between hamming loss and zero-one loss --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index fad416a7fc0ff..f5d393a03f106 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1997,8 +1997,8 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): labels. Hamming loss is more forgiving in that it penalizes the individual labels. - The Hamming loss is upperbounded by the subset zero-one loss. When - normalized over samples, the Hamming loss is always between 0 and 1. + The Hamming loss is upperbounded by the normalized zero-one loss. It is + always between 0 and 1, lower being better. References ---------- From a1f3d4a839912bb88edc0fe086a74bb3ffaf89ae Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Wed, 1 May 2019 21:07:13 +0200 Subject: [PATCH 2/7] Updated typo in hamming loss multilabel classification note --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index f5d393a03f106..95664f64019a4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1989,11 +1989,11 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): ----- In multiclass classification, the Hamming loss corresponds to the Hamming distance between ``y_true`` and ``y_pred`` which is equivalent to the - subset ``zero_one_loss`` function. + subset ``zero_one_loss`` function, when zero-one loss is normalized. In multilabel classification, the Hamming loss is different from the subset zero-one loss. The zero-one loss considers the entire set of labels - for a given sample incorrect if it does entirely match the true set of + for a given sample incorrect if it does not entirely match the true set of labels. Hamming loss is more forgiving in that it penalizes the individual labels. From f19590c8af0057feaaefe6755e2dac2c07637a39 Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Wed, 1 May 2019 21:12:05 +0200 Subject: [PATCH 3/7] Added precision in hamming loss notes --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 95664f64019a4..d9bbd665d240a 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1997,8 +1997,8 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): labels. Hamming loss is more forgiving in that it penalizes the individual labels. - The Hamming loss is upperbounded by the normalized zero-one loss. It is - always between 0 and 1, lower being better. + The Hamming loss is upperbounded by the subset zero-one loss, when zero-one + loss is normalized. It is always between 0 and 1, lower being better. References ---------- From a9b13c5a8440deda4abd5f7e2b4b85d1cbb9f366 Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Wed, 1 May 2019 21:12:42 +0200 Subject: [PATCH 4/7] Added precision in hamming loss notes --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index d9bbd665d240a..c196128e3cd75 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1994,8 +1994,8 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): In multilabel classification, the Hamming loss is different from the subset zero-one loss. The zero-one loss considers the entire set of labels for a given sample incorrect if it does not entirely match the true set of - labels. Hamming loss is more forgiving in that it penalizes the individual - labels. + labels. Hamming loss is more forgiving in that it penalizes only the + individual labels. The Hamming loss is upperbounded by the subset zero-one loss, when zero-one loss is normalized. It is always between 0 and 1, lower being better. From 2517c272b4d1cf2985d5b2460a9bc8998a517d6a Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Tue, 7 May 2019 20:42:51 +0200 Subject: [PATCH 5/7] clarified normalized term --- sklearn/metrics/classification.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 8397c5531dc27..a399171cd1e73 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1989,7 +1989,7 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): ----- In multiclass classification, the Hamming loss corresponds to the Hamming distance between ``y_true`` and ``y_pred`` which is equivalent to the - subset ``zero_one_loss`` function, when zero-one loss is normalized. + subset ``zero_one_loss`` function, when normalize parameter is set to True. In multilabel classification, the Hamming loss is different from the subset zero-one loss. The zero-one loss considers the entire set of labels @@ -1997,8 +1997,9 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): labels. Hamming loss is more forgiving in that it penalizes only the individual labels. - The Hamming loss is upperbounded by the subset zero-one loss, when zero-one - loss is normalized. It is always between 0 and 1, lower being better. + The Hamming loss is upperbounded by the subset zero-one loss, when + normalize parameter is set to True. It is always between 0 and 1, + lower being better. References ---------- From 37d214872aae38daf5020a3bca0faae937f8712f Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Tue, 7 May 2019 20:43:37 +0200 Subject: [PATCH 6/7] put normalize terme in coding format --- sklearn/metrics/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index a399171cd1e73..25aa1dcc2d493 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1989,7 +1989,7 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): ----- In multiclass classification, the Hamming loss corresponds to the Hamming distance between ``y_true`` and ``y_pred`` which is equivalent to the - subset ``zero_one_loss`` function, when normalize parameter is set to True. + subset ``zero_one_loss`` function, when `normalize` parameter is set to True. In multilabel classification, the Hamming loss is different from the subset zero-one loss. The zero-one loss considers the entire set of labels @@ -1998,7 +1998,7 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): individual labels. The Hamming loss is upperbounded by the subset zero-one loss, when - normalize parameter is set to True. It is always between 0 and 1, + `normalize` parameter is set to True. It is always between 0 and 1, lower being better. References From 6dacd1ec9ad8641287a15a1ce4732c025c1355e9 Mon Sep 17 00:00:00 2001 From: Xavier SATTLER Date: Tue, 7 May 2019 20:45:39 +0200 Subject: [PATCH 7/7] respected linter --- sklearn/metrics/classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 25aa1dcc2d493..d1337bdc61aed 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1989,7 +1989,8 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None): ----- In multiclass classification, the Hamming loss corresponds to the Hamming distance between ``y_true`` and ``y_pred`` which is equivalent to the - subset ``zero_one_loss`` function, when `normalize` parameter is set to True. + subset ``zero_one_loss`` function, when `normalize` parameter is set to + True. In multilabel classification, the Hamming loss is different from the subset zero-one loss. The zero-one loss considers the entire set of labels