Skip to content

Commit efaebba

Browse files
author
卫苏
committed
add some common loss functions
1 parent d2c73f5 commit efaebba

10 files changed

+1134
-0
lines changed

loss/__init__.py

Whitespace-only changes.

loss/ada_scaling_loss.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import tensorflow as tf
2+
3+
4+
def f1_reweight_loss(logits, labels, beta2):
5+
"""from paper: Adaptive Scaling for Sparse Detection in Information Extraction"""
6+
m = tf.count_nonzero(labels, dtype=tf.float32)
7+
batch_size = tf.shape(labels)[0]
8+
n = tf.cast(batch_size, tf.float32) - m
9+
probs = tf.nn.softmax(logits) # [batch_size, num_classes]
10+
batch_idx = tf.range(batch_size)
11+
label_with_idx = tf.concat([tf.expand_dims(t, 1) for t in [batch_idx, tf.cast(labels, tf.int32)]], 1) # [batch_size, 2]
12+
golden_probs = tf.gather_nd(probs, label_with_idx) # [batch_size]
13+
zeros = tf.zeros_like(golden_probs)
14+
is_negative = tf.equal(labels, 0)
15+
p1 = tf.reduce_sum(tf.where(is_negative, zeros, golden_probs)) # TP
16+
p2 = tf.reduce_sum(tf.where(is_negative, golden_probs, zeros)) # TN
17+
neg_weights = p1 / ((beta2 * m) + n - p2 + 1e-8)
18+
ones = tf.ones_like(golden_probs)
19+
weights = tf.where(is_negative, ones * neg_weights, ones)
20+
return tf.losses.sparse_softmax_cross_entropy(labels, logits, weights)
21+
22+
23+
def f1_reweight_loss_v2(logits, labels, beta2):
24+
probs = tf.nn.softmax(logits) # [batch_size, num_classes]
25+
labels = tf.cast(labels, tf.int32)
26+
negative_idx = tf.where(tf.equal(labels, 0), tf.ones_like(labels, dtype=tf.float32), tf.zeros_like(labels, dtype=tf.float32))
27+
positive_idx = 1.0 - negative_idx
28+
29+
batch_idx = tf.range(tf.shape(probs)[0])
30+
label_with_idx = tf.concat([tf.expand_dims(t, 1) for t in [batch_idx, labels]], 1)
31+
golden_prob = tf.gather_nd(probs, label_with_idx)
32+
m = tf.reduce_sum(positive_idx)
33+
n = tf.reduce_sum(negative_idx)
34+
p1 = tf.reduce_sum(positive_idx * golden_prob)
35+
p2 = tf.reduce_sum(negative_idx * golden_prob)
36+
neg_weight = p1 / ((beta2 * m) + n - p2 + 1e-8)
37+
all_one = tf.ones(tf.shape(golden_prob))
38+
loss_weight = all_one * positive_idx + all_one * neg_weight * negative_idx
39+
40+
loss = - loss_weight * tf.log(golden_prob + 1e-8)
41+
return loss
42+
43+
44+
def f1_reweight_sigmoid_cross_entropy(logits, labels, beta_square, label_smoothing=0, weights=None):
45+
probs = tf.nn.sigmoid(logits)
46+
if len(labels.shape.as_list()) == 1:
47+
labels = tf.expand_dims(labels, -1)
48+
labels = tf.to_float(labels)
49+
batch_size = tf.shape(labels)[0]
50+
batch_size_float = tf.to_float(batch_size)
51+
num_pos = tf.reduce_sum(labels, axis=0)
52+
num_neg = batch_size_float - num_pos
53+
tp = tf.reduce_sum(probs, axis=0)
54+
tn = batch_size_float - tp
55+
neg_weight = tp / (beta_square * num_pos + num_neg - tn + 1e-8)
56+
neg_weight_tile = tf.tile(tf.expand_dims(neg_weight, 0), [batch_size, 1])
57+
final_weights = tf.where(tf.equal(labels, 1.0), tf.ones_like(labels), neg_weight_tile)
58+
if weights is not None:
59+
if len(weights.shape.as_list()) == 1:
60+
weights = tf.expand_dims(weights, -1)
61+
final_weights *= weights
62+
return tf.losses.sigmoid_cross_entropy(labels, logits, final_weights, label_smoothing=label_smoothing)

loss/center_loss.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/usr/bin/env bash
2+
# coding: utf-8
3+
4+
import tensorflow as tf
5+
6+
7+
def get_center_loss(features, labels, alpha, num_classes):
8+
"""获取center loss及center的更新op
9+
10+
Arguments:
11+
features: Tensor,表征样本特征,一般使用某个fc层的输出,shape应该为[batch_size, feature_length].
12+
labels: Tensor,表征样本label,非one-hot编码,shape应为[batch_size].
13+
alpha: 0-1之间的数字,控制样本类别中心的学习率,细节参考原文.
14+
num_classes: 整数,表明总共有多少个类别,网络分类输出有多少个神经元这里就取多少.
15+
16+
Return:
17+
loss: Tensor,可与softmax loss相加作为总的loss进行优化.
18+
centers: Tensor,存储样本中心值的Tensor,仅查看样本中心存储的具体数值时有用.
19+
centers_update_op: op,用于更新样本中心的op,在训练时需要同时运行该op,否则样本中心不会更新
20+
"""
21+
# 获取特征的维数,例如256维
22+
len_features = features.get_shape()[1]
23+
# 建立一个Variable,shape为[num_classes, len_features],用于存储整个网络的样本中心,
24+
# 设置trainable=False是因为样本中心不是由梯度进行更新的
25+
centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32, trainable=False,
26+
initializer=tf.contrib.layers.xavier_initializer())
27+
# initializer=tf.constant_initializer(0))
28+
# 将label展开为一维的,输入如果已经是一维的,则该动作其实无必要
29+
# labels = tf.reshape(labels, [-1])
30+
31+
# 根据样本label,获取mini-batch中每一个样本对应的中心值
32+
centers_batch = tf.gather(centers, labels)
33+
# 计算loss
34+
loss = tf.losses.mean_squared_error(features, centers_batch)
35+
36+
# 当前mini-batch的特征值与它们对应的中心值之间的差
37+
diff = centers_batch - features
38+
39+
# 获取mini-batch中同一类别样本出现的次数,了解原理请参考原文公式(4)
40+
unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
41+
appear_times = tf.gather(unique_count, unique_idx)
42+
appear_times = tf.reshape(appear_times, [-1, 1])
43+
44+
diff = diff / tf.cast((1 + appear_times), tf.float32)
45+
diff = alpha * diff
46+
47+
centers_update_op = tf.scatter_sub(centers, labels, diff)
48+
tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, centers_update_op)
49+
return loss, centers, centers_update_op
50+
51+
52+
# from facenet
53+
def center_loss(features, label, alfa, nrof_classes):
54+
"""Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
55+
(http://ydwen.github.io/papers/WenECCV16.pdf)
56+
"""
57+
nrof_features = features.get_shape()[1]
58+
centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
59+
initializer=tf.constant_initializer(0), trainable=False)
60+
label = tf.reshape(label, [-1])
61+
centers_batch = tf.gather(centers, label)
62+
diff = (1 - alfa) * (centers_batch - features)
63+
centers = tf.scatter_sub(centers, label, diff)
64+
with tf.control_dependencies([centers]):
65+
loss = tf.reduce_mean(tf.square(features - centers_batch))
66+
return loss, centers
67+
68+
69+
def AM_logits_compute(embeddings, label_batch, args, nrof_classes):
70+
'''
71+
loss head proposed in paper:<Additive Margin Softmax for Face Verification>
72+
link: https://arxiv.org/abs/1801.05599
73+
embeddings : normalized embedding layer of Facenet, it's normalized value of output of resface
74+
label_batch : ground truth label of current training batch
75+
args: arguments from cmd line
76+
nrof_classes: number of classes
77+
'''
78+
m = 0.35
79+
s = 30
80+
81+
with tf.name_scope('AM_logits'):
82+
kernel = tf.get_variable(name='kernel', dtype=tf.float32, shape=[args.embedding_size, nrof_classes],
83+
initializer=tf.contrib.layers.xavier_initializer(uniform=False))
84+
kernel_norm = tf.nn.l2_normalize(kernel, 0, 1e-10, name='kernel_norm')
85+
cos_theta = tf.matmul(embeddings, kernel_norm)
86+
cos_theta = tf.clip_by_value(cos_theta, -1, 1) # for numerical steady
87+
phi = cos_theta - m
88+
label_onehot = tf.one_hot(label_batch, nrof_classes)
89+
adjust_theta = s * tf.where(tf.equal(label_onehot, 1), phi, cos_theta)
90+
91+
return adjust_theta
92+
93+
94+
if __name__ == '__main__':
95+
b = tf.constant([[1, 1], [2, 2], [3, 3]], dtype=tf.float32)
96+
with tf.Session() as sess:
97+
print(sess.run(tf.nn.l2_loss(b)))

loss/circle_loss.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# coding=utf-8
2+
import tensorflow as tf
3+
import numpy as np
4+
5+
6+
def circle_loss(pair_wise_cosine_matrix, pred_true_mask,
7+
pred_neg_mask,
8+
margin=0.25,
9+
gamma=64):
10+
"""
11+
@:param pair_wise_cosine_matrix 所有样本对的相似度矩阵
12+
@:param pred_true_mask 正样本对的mask矩阵
13+
@:param pred_neg_mask 负样本对的mask矩阵
14+
https://github.com/zhen8838/Circle-Loss/blob/master/circle_loss.py
15+
"""
16+
O_p = 1 + margin
17+
O_n = -margin
18+
19+
Delta_p = 1 - margin
20+
Delta_n = margin
21+
22+
ap = tf.nn.relu(-tf.stop_gradient(pair_wise_cosine_matrix * pred_true_mask) + 1 + margin)
23+
an = tf.nn.relu(tf.stop_gradient(pair_wise_cosine_matrix * pred_neg_mask) + margin)
24+
25+
logit_p = -ap * (pair_wise_cosine_matrix - Delta_p) * gamma * pred_true_mask
26+
logit_n = an * (pair_wise_cosine_matrix - Delta_n) * gamma * pred_neg_mask
27+
28+
logit_p = logit_p - (1 - pred_true_mask) * 1e12
29+
logit_n = logit_n - (1 - pred_neg_mask) * 1e12
30+
31+
joint_neg_loss = tf.reduce_logsumexp(logit_n, axis=-1)
32+
joint_pos_loss = tf.reduce_logsumexp(logit_p, axis=-1)
33+
logits = tf.nn.softplus(joint_neg_loss + joint_pos_loss)
34+
return logits
35+
36+
37+
def _get_anchor_positive_triplet_mask(labels):
38+
"""Return a 2D mask where mask[a, p] is True iff a and p are distinct and have same label.
39+
Args:
40+
labels: tf.int32 `Tensor` with shape [batch_size]
41+
Returns:
42+
mask: tf.bool `Tensor` with shape [batch_size, batch_size]
43+
"""
44+
# Check that i and j are distinct
45+
indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
46+
indices_not_equal = tf.logical_not(indices_equal)
47+
48+
# Check if labels[i] == labels[j]
49+
# Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
50+
labels_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
51+
52+
# Combine the two masks
53+
mask = tf.logical_and(indices_not_equal, labels_equal)
54+
55+
return mask
56+
57+
58+
def _get_anchor_negative_triplet_mask(labels):
59+
"""Return a 2D mask where mask[a, n] is True iff a and n have distinct labels.
60+
Args:
61+
labels: tf.int32 `Tensor` with shape [batch_size]
62+
Returns:
63+
mask: tf.bool `Tensor` with shape [batch_size, batch_size]
64+
"""
65+
# Check if labels[i] != labels[k]
66+
# Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
67+
labels_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
68+
69+
mask = tf.logical_not(labels_equal)
70+
71+
return mask
72+
73+
74+
input_tensor = tf.convert_to_tensor(np.random.random((10, 16)).astype(np.float32))
75+
input_tensor = tf.nn.l2_normalize(input_tensor, axis=-1)
76+
labels = tf.convert_to_tensor([1, 0, 2, 2, 1, 1, 4, 0, 4, 1])
77+
78+
# [10, 10]
79+
pair_wise_cosine_matrix = tf.matmul(input_tensor, tf.transpose(input_tensor))
80+
81+
positive_mask = _get_anchor_positive_triplet_mask(labels)
82+
negative_mask = _get_anchor_negative_triplet_mask(labels)
83+
84+
positive_mask = tf.cast(positive_mask, tf.float32)
85+
negative_mask = tf.cast(negative_mask, tf.float32)
86+
87+
loss = circle_loss(pair_wise_cosine_matrix, positive_mask,
88+
negative_mask,
89+
margin=0.25,
90+
gamma=64)
91+
sess = tf.Session()
92+
print(sess.run([positive_mask, negative_mask, loss]))

loss/cross_entropy_with_prior.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# coding: utf-8
2+
import tensorflow as tf
3+
4+
5+
def sparse_softmax_cross_entropy_with_prior(labels, logits, priors, tau=1.0):
6+
"""带先验分布的稀疏交叉熵: Long-Tail Learning via Logit Adjustment. priors: shape is [num_classes], 类别的先验概率分布"""
7+
log_priors = tf.math.log(priors)
8+
if len(log_priors.shape.as_list()) == 1:
9+
log_priors = tf.expand_dims(log_priors, 0)
10+
# print(log_priors.shape)
11+
# print(logits.shape)
12+
# print(labels.shape)
13+
logits += tau * log_priors
14+
return tf.losses.sparse_softmax_cross_entropy(labels, logits)
15+

loss/focal_loss.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# coding: utf-8
2+
"""Implements Focal loss."""
3+
# ____ __ ___ __ __ __ __ ____ ____
4+
# ( __)/ \ / __) / _\ ( ) ( ) / \ / ___)/ ___)
5+
# ) _)( O )( (__ / \/ (_/\ / (_/\( O )\___ \\___ \
6+
# (__) \__/ \___)\_/\_/\____/ \____/ \__/ (____/(____/
7+
from __future__ import absolute_import
8+
from __future__ import division
9+
from __future__ import print_function
10+
11+
import tensorflow as tf
12+
13+
14+
def sigmoid_focal_loss_with_logits(y_true, y_pred, alpha=0.25, gamma=2.0):
15+
"""
16+
Implements the focal loss function.
17+
Focal loss was first introduced in the RetinaNet paper
18+
(https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
19+
classification when you have highly imbalanced classes. It down-weights
20+
well-classified examples and focuses on hard examples. The loss value is
21+
much high for a sample which is misclassified by the classifier as compared
22+
to the loss value corresponding to a well-classified example. One of the
23+
best use-cases of focal loss is its usage in object detection where the
24+
imbalance between the background class and other classes is extremely high.
25+
Args
26+
y_true: true targets tensor (labels).
27+
y_pred: predictions tensor (logits).
28+
alpha: balancing factor.
29+
gamma: modulating factor.
30+
Returns:
31+
Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the
32+
same shape as `y_true`; otherwise, it is scalar.
33+
Raises:
34+
ValueError: If the shape of `sample_weight` is invalid or value of
35+
`gamma` is less than zero
36+
"""
37+
if gamma and gamma < 0:
38+
raise ValueError(
39+
"Value of gamma should be greater than or equal to zero")
40+
41+
y_pred = tf.convert_to_tensor(y_pred)
42+
y_true = tf.cast(y_true, y_pred.dtype)
43+
44+
# Get the binary cross_entropy
45+
bce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)
46+
47+
# convert the predictions into probabilities
48+
y_pred = tf.nn.sigmoid(y_pred)
49+
50+
p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred))
51+
alpha_factor = 1
52+
modulating_factor = 1
53+
54+
if alpha:
55+
alpha = tf.convert_to_tensor(alpha, dtype=tf.float32)
56+
alpha_factor = y_true * alpha + ((1 - alpha) * (1 - y_true))
57+
58+
if gamma:
59+
gamma = tf.convert_to_tensor(gamma, dtype=tf.float32)
60+
modulating_factor = tf.pow((1 - p_t), gamma)
61+
62+
# compute the final loss and return
63+
return tf.reduce_mean(alpha_factor * modulating_factor * bce, axis=-1, keepdims=True)
64+
65+
66+
def softmax_focal_loss_with_logits(logits, labels, alpha=None, sample_weights=None, gamma=2.0, epsilon=1.e-7):
67+
"""
68+
Args:
69+
logits: [batch_size, num_class]
70+
labels: [batch_size] not one-hot !!!
71+
alpha: [num_class] 一般为其他类的样本比例,样本越多的类对应的alpha越小
72+
Returns:
73+
-alpha*(1-y)^r * log(y)
74+
它是在哪实现 1- y 的? 通过gather选择的就是1-p,而不是通过计算实现的;
75+
logits softmax之后是多个类别的概率,也就是二分类时候的1-P和P;多分类的时候不是1-p了;
76+
77+
怎么把alpha的权重加上去?
78+
通过gather把alpha选择后变成batch长度,同时达到了选择和维度变换的目的
79+
"""
80+
labels = tf.cast(labels, dtype=tf.int32)
81+
softmax = tf.reshape(tf.nn.softmax(logits), [-1]) # [batch_size * num_class]
82+
batch_size, num_class = get_shape_list(logits)
83+
# (N,) > (N,), 但是数值变换了,变成了每个label在 N * num_class 中的位置
84+
labels_shift = tf.range(0, batch_size) * num_class + labels
85+
# (N * num_class,) > (N,)
86+
prob = tf.gather(softmax, labels_shift) # 属于当前类的概率
87+
# 预防预测概率值为0的情况; (N,)
88+
prob = tf.clip_by_value(prob, epsilon, 1. - epsilon)
89+
weights = tf.pow(tf.subtract(1., prob), gamma)
90+
91+
if alpha is not None:
92+
alpha = tf.constant(alpha, dtype=tf.float32) # (num_class, 1)
93+
# (num_class ,1) > (N,)
94+
alpha_choice = tf.gather(alpha, labels)
95+
weights = tf.multiply(alpha_choice, weights)
96+
97+
if sample_weights is not None:
98+
weights = tf.multiply(weights, sample_weights)
99+
return tf.losses.sparse_softmax_cross_entropy(labels, logits, weights=weights)
100+
101+
def get_shape_list(tensor):
102+
"""Returns a list of the shape of tensor, preferring static dimensions.
103+
Args:
104+
tensor: A tf.Tensor object to find the shape of.
105+
106+
Returns:
107+
A list of dimensions of the shape of tensor. All static dimensions will
108+
be returned as python integers, and dynamic dimensions will be returned
109+
as tf.Tensor scalars.
110+
"""
111+
shape = tensor.shape.as_list()
112+
113+
non_static_indexes = []
114+
for (index, dim) in enumerate(shape):
115+
if dim is None:
116+
non_static_indexes.append(index)
117+
118+
if not non_static_indexes:
119+
return shape
120+
121+
dyn_shape = tf.shape(tensor)
122+
for index in non_static_indexes:
123+
shape[index] = dyn_shape[index]
124+
return shape

0 commit comments

Comments
 (0)