Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sklearn/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,21 @@
hamming_loss,
hinge_loss,
jaccard_similarity_score,
kendall_tau_score,
log_loss,
matthews_corrcoef,
mean_squared_error,
mean_absolute_error,
mean_ndcg_score,
ndcg_score,
pairwise_accuracy_score,
precision_recall_curve,
precision_recall_fscore_support,
precision_score,
recall_score,
r2_score,
roc_curve,
spearman_rho_score,
zero_one_loss)


Expand Down
192 changes: 192 additions & 0 deletions sklearn/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@

from scipy.sparse import coo_matrix
from scipy.spatial.distance import hamming as sp_hamming
from scipy.stats import kendalltau
from scipy.stats import spearmanr

from ..externals.six.moves import zip
from ..externals.six.moves import xrange as range
from .metrics_fast import _pairwise_ranking_accuracy
from ..preprocessing import LabelBinarizer, label_binarize
from ..preprocessing import LabelEncoder
from ..utils import check_arrays
Expand Down Expand Up @@ -201,6 +204,195 @@ class UndefinedMetricWarning(UserWarning):
pass


###############################################################################
# Ranking metrics
###############################################################################

# FIXME: ROC-AUC and Average Precision are also ranking metrics.


def pairwise_accuracy_score(y_true, y_score):
"""Pairwise accuracy score

Returns the percentage of pairs which are predicted in the correct order by
the model.

Parameters
----------
y_true : array, shape = [n_samples]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to follow numpy convention we agreed during last sprint to write

array, shape (n_samples,)

True targets, consisting of real or integer values.

y_score : array, shape = [n_samples]
Predicted targets, consisting of real values (output of
decision_function or predict_proba).

Returns
-------
pairwise_accuracy : float

Note
----
ROC-AUC is a special case of pairwise accuracy: the pairwise ranking
accuracy is equal to AUC when y_true contains only two unique values.

Reference
---------
C.-P. Lee and C.-J. Lin.
Large-scale Linear RankSVM .
Neural Computation, 2013.
"""
y_true, y_score = check_arrays(y_true, y_score, dtype=np.float64)
n_samples = y_true.shape[0]

n_correct, n_total = _pairwise_ranking_accuracy(y_true, y_score)

if n_total == 0:
warnings.warn("pairwise_accuracy_score is undefined when all values"
" in y_true are the same.")
return 0

return n_correct / float(n_total)


def kendall_tau_score(y_true, y_score):
"""Kendall's tau score

Kendall's tau is a number between -1 (negative correlation) and +1 (positive
correlation). 0 means no correlation.

Parameters
----------
y_true : array, shape = [n_samples]
True targets, consisting of real or integer values.

y_score : array, shape = [n_samples]
Predicted targets, consisting of real values (output of
decision_function or predict_proba).

Returns
-------
kendall_tau : float

Note
----
The implementation uses scipy.stats.kendalltau.

Reference
---------
http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient
"""
return kendalltau(y_true, y_score)[0]


def spearman_rho_score(y_true, y_score):
"""Spearman's rho score

Spearman's rho is a number between -1 (negative correlation) and +1
(positive correlation). 0 means no correlation.

Parameters
----------
y_true : array, shape = [n_samples]
True targets, consisting of real or integer values.

y_score : array, shape = [n_samples]
Predicted targets, consisting of real values (output of
decision_function or predict_proba).

Returns
-------
spearman_rho : float

Note
----
The implementation uses scipy.stats.spearmanr.

Reference
---------
http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
"""
return spearmanr(y_true, y_score)[0]


def _dcg(y_true, y_score, k=None, gains="exponential", ret_all=False):
if k is None:
k = len(y_score)

order = np.argsort(y_score)[::-1]
y_true = np.take(y_true, order[:k])

if gains == "exponential":
gains = 2 ** y_true - 1
elif gains == "linear":
gains = y_true
else:
raise ValueError("Invalid gains option.")

# highest rank is 1 so +2 instead of +1
discounts = np.log2(np.arange(len(y_true)) + 2)

if ret_all:
return np.cumsum(gains / discounts)
else:
return np.sum(gains / discounts)


def ndcg_score(y_true, y_score, k=None, gains="exponential"):
"""Normalized discounted cumulative gain (NDCG) at rank k

Parameters
----------
y_true : array-like, shape = [n_samples]
Ground truth (true relevance labels).

y_score : array-like, shape = [n_samples]
Predicted scores.

k : int or None
Position.

gains : str
Whether gains should be "exponential" (default) or "linear".

Returns
-------
NDCG @k : float

Reference
---------
http://en.wikipedia.org/wiki/Discounted_cumulative_gain
"""
best = _dcg(y_true, y_true, k, gains, ret_all=False)
actual = _dcg(y_true, y_score, k, gains, ret_all=False)
return actual / best


def mean_ndcg_score(y_true, y_score, k=None, gains="exponential"):
"""Mean of the NDCG scores from positions 1 to k.

Parameters
----------
y_true : array-like, shape = [n_samples]
Ground truth (true relevance labels).

y_score : array-like, shape = [n_samples]
Predicted scores.

k : int or None
Position.

gains : str
Whether gains should be "exponential" (default) or "linear".

Returns
-------
mean NDCG @k : float
"""
best = _dcg(y_true, y_true, k, gains, ret_all=True)
actual = _dcg(y_true, y_score, k, gains, ret_all=True)
return np.mean(actual / best)


###############################################################################
# Classification metrics
###############################################################################
Expand Down
Loading