nipengmath
diff --git a/‎loss/__init__.py b/‎loss/__init__.py
diff --git a/‎loss/ada_scaling_loss.py
Lines changed: 62 additions & 0 deletions b/‎loss/ada_scaling_loss.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎loss/center_loss.py
Lines changed: 97 additions & 0 deletions b/‎loss/center_loss.py
Lines changed: 97 additions & 0 deletions
diff --git a/‎loss/circle_loss.py
Lines changed: 92 additions & 0 deletions b/‎loss/circle_loss.py
Lines changed: 92 additions & 0 deletions
diff --git a/‎loss/cross_entropy_with_prior.py
Lines changed: 15 additions & 0 deletions b/‎loss/cross_entropy_with_prior.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎loss/focal_loss.py
Lines changed: 124 additions & 0 deletions b/‎loss/focal_loss.py
Lines changed: 124 additions & 0 deletions
@@ -0,0 +1,62 @@
+import tensorflow as tf
+
+
+def f1_reweight_loss(logits, labels, beta2):
+    """from paper: Adaptive Scaling for Sparse Detection in Information Extraction"""
+    m = tf.count_nonzero(labels, dtype=tf.float32)
+    batch_size = tf.shape(labels)[0]
+    n = tf.cast(batch_size, tf.float32) - m
+    probs = tf.nn.softmax(logits)  # [batch_size, num_classes]
+    batch_idx = tf.range(batch_size)
+    label_with_idx = tf.concat([tf.expand_dims(t, 1) for t in [batch_idx, tf.cast(labels, tf.int32)]], 1)  # [batch_size, 2]
+    golden_probs = tf.gather_nd(probs, label_with_idx)  # [batch_size]
+    zeros = tf.zeros_like(golden_probs)
+    is_negative = tf.equal(labels, 0)
+    p1 = tf.reduce_sum(tf.where(is_negative, zeros, golden_probs))  # TP
+    p2 = tf.reduce_sum(tf.where(is_negative, golden_probs, zeros))  # TN
+    neg_weights = p1 / ((beta2 * m) + n - p2 + 1e-8)
+    ones = tf.ones_like(golden_probs)
+    weights = tf.where(is_negative, ones * neg_weights, ones)
+    return tf.losses.sparse_softmax_cross_entropy(labels, logits, weights)
+
+
+def f1_reweight_loss_v2(logits, labels, beta2):
+    probs = tf.nn.softmax(logits)  # [batch_size, num_classes]
+    labels = tf.cast(labels, tf.int32)
+    negative_idx = tf.where(tf.equal(labels, 0), tf.ones_like(labels, dtype=tf.float32), tf.zeros_like(labels, dtype=tf.float32))
+    positive_idx = 1.0 - negative_idx
+
+    batch_idx = tf.range(tf.shape(probs)[0])
+    label_with_idx = tf.concat([tf.expand_dims(t, 1) for t in [batch_idx, labels]], 1)
+    golden_prob = tf.gather_nd(probs, label_with_idx)
+    m = tf.reduce_sum(positive_idx)
+    n = tf.reduce_sum(negative_idx)
+    p1 = tf.reduce_sum(positive_idx * golden_prob)
+    p2 = tf.reduce_sum(negative_idx * golden_prob)
+    neg_weight = p1 / ((beta2 * m) + n - p2 + 1e-8)
+    all_one = tf.ones(tf.shape(golden_prob))
+    loss_weight = all_one * positive_idx + all_one * neg_weight * negative_idx
+
+    loss = - loss_weight * tf.log(golden_prob + 1e-8)
+    return loss
+
+
+def f1_reweight_sigmoid_cross_entropy(logits, labels, beta_square, label_smoothing=0, weights=None):
+    probs = tf.nn.sigmoid(logits)
+    if len(labels.shape.as_list()) == 1:
+        labels = tf.expand_dims(labels, -1)
+    labels = tf.to_float(labels)
+    batch_size = tf.shape(labels)[0]
+    batch_size_float = tf.to_float(batch_size)
+    num_pos = tf.reduce_sum(labels, axis=0)
+    num_neg = batch_size_float - num_pos
+    tp = tf.reduce_sum(probs, axis=0)
+    tn = batch_size_float - tp
+    neg_weight = tp / (beta_square * num_pos + num_neg - tn + 1e-8)
+    neg_weight_tile = tf.tile(tf.expand_dims(neg_weight, 0), [batch_size, 1])
+    final_weights = tf.where(tf.equal(labels, 1.0), tf.ones_like(labels), neg_weight_tile)
+    if weights is not None:
+        if len(weights.shape.as_list()) == 1:
+            weights = tf.expand_dims(weights, -1)
+        final_weights *= weights
+    return tf.losses.sigmoid_cross_entropy(labels, logits, final_weights, label_smoothing=label_smoothing)
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+# coding: utf-8
+
+import tensorflow as tf
+
+
+def get_center_loss(features, labels, alpha, num_classes):
+    """获取center loss及center的更新op
+
+    Arguments:
+        features: Tensor,表征样本特征,一般使用某个fc层的输出,shape应该为[batch_size, feature_length].
+        labels: Tensor,表征样本label,非one-hot编码,shape应为[batch_size].
+        alpha: 0-1之间的数字,控制样本类别中心的学习率,细节参考原文.
+        num_classes: 整数,表明总共有多少个类别,网络分类输出有多少个神经元这里就取多少.
+
+    Return：
+        loss: Tensor,可与softmax loss相加作为总的loss进行优化.
+        centers: Tensor,存储样本中心值的Tensor，仅查看样本中心存储的具体数值时有用.
+        centers_update_op: op,用于更新样本中心的op，在训练时需要同时运行该op，否则样本中心不会更新
+    """
+    # 获取特征的维数，例如256维
+    len_features = features.get_shape()[1]
+    # 建立一个Variable,shape为[num_classes, len_features]，用于存储整个网络的样本中心，
+    # 设置trainable=False是因为样本中心不是由梯度进行更新的
+    centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32, trainable=False,
+                              initializer=tf.contrib.layers.xavier_initializer())
+    # initializer=tf.constant_initializer(0))
+    # 将label展开为一维的，输入如果已经是一维的，则该动作其实无必要
+    # labels = tf.reshape(labels, [-1])
+
+    # 根据样本label,获取mini-batch中每一个样本对应的中心值
+    centers_batch = tf.gather(centers, labels)
+    # 计算loss
+    loss = tf.losses.mean_squared_error(features, centers_batch)
+
+    # 当前mini-batch的特征值与它们对应的中心值之间的差
+    diff = centers_batch - features
+
+    # 获取mini-batch中同一类别样本出现的次数,了解原理请参考原文公式(4)
+    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
+    appear_times = tf.gather(unique_count, unique_idx)
+    appear_times = tf.reshape(appear_times, [-1, 1])
+
+    diff = diff / tf.cast((1 + appear_times), tf.float32)
+    diff = alpha * diff
+
+    centers_update_op = tf.scatter_sub(centers, labels, diff)
+    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, centers_update_op)
+    return loss, centers, centers_update_op
+
+
+# from facenet
+def center_loss(features, label, alfa, nrof_classes):
+    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
+       (http://ydwen.github.io/papers/WenECCV16.pdf)
+    """
+    nrof_features = features.get_shape()[1]
+    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
+        initializer=tf.constant_initializer(0), trainable=False)
+    label = tf.reshape(label, [-1])
+    centers_batch = tf.gather(centers, label)
+    diff = (1 - alfa) * (centers_batch - features)
+    centers = tf.scatter_sub(centers, label, diff)
+    with tf.control_dependencies([centers]):
+        loss = tf.reduce_mean(tf.square(features - centers_batch))
+    return loss, centers
+
+
+def AM_logits_compute(embeddings, label_batch, args, nrof_classes):
+    '''
+    loss head proposed in paper:<Additive Margin Softmax for Face Verification>
+    link: https://arxiv.org/abs/1801.05599
+    embeddings : normalized embedding layer of Facenet, it's normalized value of output of resface
+    label_batch : ground truth label of current training batch
+    args:         arguments from cmd line
+    nrof_classes: number of classes
+    '''
+    m = 0.35
+    s = 30
+
+    with tf.name_scope('AM_logits'):
+        kernel = tf.get_variable(name='kernel', dtype=tf.float32, shape=[args.embedding_size, nrof_classes],
+                                 initializer=tf.contrib.layers.xavier_initializer(uniform=False))
+        kernel_norm = tf.nn.l2_normalize(kernel, 0, 1e-10, name='kernel_norm')
+        cos_theta = tf.matmul(embeddings, kernel_norm)
+        cos_theta = tf.clip_by_value(cos_theta, -1, 1)  # for numerical steady
+        phi = cos_theta - m
+        label_onehot = tf.one_hot(label_batch, nrof_classes)
+        adjust_theta = s * tf.where(tf.equal(label_onehot, 1), phi, cos_theta)
+
+        return adjust_theta
+
+
+if __name__ == '__main__':
+    b = tf.constant([[1, 1], [2, 2], [3, 3]], dtype=tf.float32)
+    with tf.Session() as sess:
+        print(sess.run(tf.nn.l2_loss(b)))
@@ -0,0 +1,92 @@
+# coding=utf-8
+import tensorflow as tf
+import numpy as np
+
+
+def circle_loss(pair_wise_cosine_matrix, pred_true_mask,
+                pred_neg_mask,
+                margin=0.25,
+                gamma=64):
+    """
+    @:param pair_wise_cosine_matrix 所有样本对的相似度矩阵
+    @:param pred_true_mask 正样本对的mask矩阵
+    @:param pred_neg_mask 负样本对的mask矩阵
+    https://github.com/zhen8838/Circle-Loss/blob/master/circle_loss.py
+    """
+    O_p = 1 + margin
+    O_n = -margin
+
+    Delta_p = 1 - margin
+    Delta_n = margin
+
+    ap = tf.nn.relu(-tf.stop_gradient(pair_wise_cosine_matrix * pred_true_mask) + 1 + margin)
+    an = tf.nn.relu(tf.stop_gradient(pair_wise_cosine_matrix * pred_neg_mask) + margin)
+
+    logit_p = -ap * (pair_wise_cosine_matrix - Delta_p) * gamma * pred_true_mask
+    logit_n = an * (pair_wise_cosine_matrix - Delta_n) * gamma * pred_neg_mask
+
+    logit_p = logit_p - (1 - pred_true_mask) * 1e12
+    logit_n = logit_n - (1 - pred_neg_mask) * 1e12
+
+    joint_neg_loss = tf.reduce_logsumexp(logit_n, axis=-1)
+    joint_pos_loss = tf.reduce_logsumexp(logit_p, axis=-1)
+    logits = tf.nn.softplus(joint_neg_loss + joint_pos_loss)
+    return logits
+
+
+def _get_anchor_positive_triplet_mask(labels):
+    """Return a 2D mask where mask[a, p] is True iff a and p are distinct and have same label.
+  Args:
+    labels: tf.int32 `Tensor` with shape [batch_size]
+  Returns:
+    mask: tf.bool `Tensor` with shape [batch_size, batch_size]
+  """
+    # Check that i and j are distinct
+    indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
+    indices_not_equal = tf.logical_not(indices_equal)
+
+    # Check if labels[i] == labels[j]
+    # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
+    labels_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
+
+    # Combine the two masks
+    mask = tf.logical_and(indices_not_equal, labels_equal)
+
+    return mask
+
+
+def _get_anchor_negative_triplet_mask(labels):
+    """Return a 2D mask where mask[a, n] is True iff a and n have distinct labels.
+  Args:
+    labels: tf.int32 `Tensor` with shape [batch_size]
+  Returns:
+    mask: tf.bool `Tensor` with shape [batch_size, batch_size]
+  """
+    # Check if labels[i] != labels[k]
+    # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
+    labels_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
+
+    mask = tf.logical_not(labels_equal)
+
+    return mask
+
+
+input_tensor = tf.convert_to_tensor(np.random.random((10, 16)).astype(np.float32))
+input_tensor = tf.nn.l2_normalize(input_tensor, axis=-1)
+labels = tf.convert_to_tensor([1, 0, 2, 2, 1, 1, 4, 0, 4, 1])
+
+# [10, 10]
+pair_wise_cosine_matrix = tf.matmul(input_tensor, tf.transpose(input_tensor))
+
+positive_mask = _get_anchor_positive_triplet_mask(labels)
+negative_mask = _get_anchor_negative_triplet_mask(labels)
+
+positive_mask = tf.cast(positive_mask, tf.float32)
+negative_mask = tf.cast(negative_mask, tf.float32)
+
+loss = circle_loss(pair_wise_cosine_matrix, positive_mask,
+                   negative_mask,
+                   margin=0.25,
+                   gamma=64)
+sess = tf.Session()
+print(sess.run([positive_mask, negative_mask, loss]))
@@ -0,0 +1,15 @@
+# coding: utf-8
+import tensorflow as tf
+
+
+def sparse_softmax_cross_entropy_with_prior(labels, logits, priors, tau=1.0):
+    """带先验分布的稀疏交叉熵: Long-Tail Learning via Logit Adjustment. priors: shape is [num_classes], 类别的先验概率分布"""
+    log_priors = tf.math.log(priors)
+    if len(log_priors.shape.as_list()) == 1:
+        log_priors = tf.expand_dims(log_priors, 0)
+    # print(log_priors.shape)
+    # print(logits.shape)
+    # print(labels.shape)
+    logits += tau * log_priors
+    return tf.losses.sparse_softmax_cross_entropy(labels, logits)
+
@@ -0,0 +1,124 @@
+# coding: utf-8
+"""Implements Focal loss."""
+#  ____  __    ___   __   __      __     __   ____  ____
+# (  __)/  \  / __) / _\ (  )    (  )   /  \ / ___)/ ___)
+#  ) _)(  O )( (__ /    \/ (_/\  / (_/\(  O )\___ \\___ \
+# (__)  \__/  \___)\_/\_/\____/  \____/ \__/ (____/(____/
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+def sigmoid_focal_loss_with_logits(y_true, y_pred, alpha=0.25, gamma=2.0):
+    """
+    Implements the focal loss function.
+    Focal loss was first introduced in the RetinaNet paper
+    (https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
+    classification when you have highly imbalanced classes. It down-weights
+    well-classified examples and focuses on hard examples. The loss value is
+    much high for a sample which is misclassified by the classifier as compared
+    to the loss value corresponding to a well-classified example. One of the
+    best use-cases of focal loss is its usage in object detection where the
+    imbalance between the background class and other classes is extremely high.
+    Args
+        y_true: true targets tensor (labels).
+        y_pred: predictions tensor (logits).
+        alpha: balancing factor.
+        gamma: modulating factor.
+    Returns:
+        Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the
+        same shape as `y_true`; otherwise, it is scalar.
+    Raises:
+        ValueError: If the shape of `sample_weight` is invalid or value of
+          `gamma` is less than zero
+    """
+    if gamma and gamma < 0:
+        raise ValueError(
+            "Value of gamma should be greater than or equal to zero")
+
+    y_pred = tf.convert_to_tensor(y_pred)
+    y_true = tf.cast(y_true, y_pred.dtype)
+
+    # Get the binary cross_entropy
+    bce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)
+
+    # convert the predictions into probabilities
+    y_pred = tf.nn.sigmoid(y_pred)
+
+    p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred))
+    alpha_factor = 1
+    modulating_factor = 1
+
+    if alpha:
+        alpha = tf.convert_to_tensor(alpha, dtype=tf.float32)
+        alpha_factor = y_true * alpha + ((1 - alpha) * (1 - y_true))
+
+    if gamma:
+        gamma = tf.convert_to_tensor(gamma, dtype=tf.float32)
+        modulating_factor = tf.pow((1 - p_t), gamma)
+
+    # compute the final loss and return
+    return tf.reduce_mean(alpha_factor * modulating_factor * bce, axis=-1, keepdims=True)
+
+
+def softmax_focal_loss_with_logits(logits, labels, alpha=None, sample_weights=None, gamma=2.0, epsilon=1.e-7):
+    """
+    Args:
+        logits:  [batch_size, num_class]
+        labels: [batch_size]  not one-hot !!!
+        alpha: [num_class] 一般为其他类的样本比例，样本越多的类对应的alpha越小
+    Returns:
+        -alpha*(1-y)^r * log(y)
+    它是在哪实现 1- y 的？ 通过gather选择的就是1-p,而不是通过计算实现的；
+    logits softmax之后是多个类别的概率，也就是二分类时候的1-P和P；多分类的时候不是1-p了；
+
+    怎么把alpha的权重加上去？
+    通过gather把alpha选择后变成batch长度，同时达到了选择和维度变换的目的
+    """
+    labels = tf.cast(labels, dtype=tf.int32)
+    softmax = tf.reshape(tf.nn.softmax(logits), [-1])  # [batch_size * num_class]
+    batch_size, num_class = get_shape_list(logits)
+    # (N,) > (N,), 但是数值变换了，变成了每个label在 N * num_class 中的位置
+    labels_shift = tf.range(0, batch_size) * num_class + labels
+    # (N * num_class,) > (N,)
+    prob = tf.gather(softmax, labels_shift)  # 属于当前类的概率
+    # 预防预测概率值为0的情况; (N,)
+    prob = tf.clip_by_value(prob, epsilon, 1. - epsilon)
+    weights = tf.pow(tf.subtract(1., prob), gamma)
+
+    if alpha is not None:
+        alpha = tf.constant(alpha, dtype=tf.float32)  # (num_class, 1)
+        # (num_class ,1) > (N,)
+        alpha_choice = tf.gather(alpha, labels)
+        weights = tf.multiply(alpha_choice, weights)
+
+    if sample_weights is not None:
+        weights = tf.multiply(weights, sample_weights)
+    return tf.losses.sparse_softmax_cross_entropy(labels, logits, weights=weights)
+
+def get_shape_list(tensor):
+    """Returns a list of the shape of tensor, preferring static dimensions.
+    Args:
+      tensor: A tf.Tensor object to find the shape of.
+
+    Returns:
+      A list of dimensions of the shape of tensor. All static dimensions will
+      be returned as python integers, and dynamic dimensions will be returned
+      as tf.Tensor scalars.
+    """
+    shape = tensor.shape.as_list()
+
+    non_static_indexes = []
+    for (index, dim) in enumerate(shape):
+        if dim is None:
+            non_static_indexes.append(index)
+
+    if not non_static_indexes:
+        return shape
+
+    dyn_shape = tf.shape(tensor)
+    for index in non_static_indexes:
+        shape[index] = dyn_shape[index]
+    return shape