top k viterbi

atulkum · atulkum · commit 91e8420274e3 · 2019-02-17T13:22:10.000-08:00
diff --git a/neural_ner/config.py b/neural_ner/config.py
@@ -66,7 +66,7 @@ class Config(object):
 
 config.verbose = False
 config.is_caps=True
-config.is_structural_perceptron_loss=False
+config.is_structural_perceptron_loss=True
 
 config.input_format='conll2003' #crfsuite
 
diff --git a/neural_ner/crf.py b/neural_ner/crf.py
@@ -3,7 +3,12 @@
 import torch
 import torch.nn as nn
 from data_utils.constant import Constants
+from model_utils import get_mask
+
 import numpy as np
+import logging
+
+logging.basicConfig(level=logging.INFO)
 
 class CRF_Loss(nn.Module):
     def __init__(self, tagset_size, config):
@@ -66,7 +71,8 @@ def log_likelihood(self, emissions, tags):
         log_p_y_x = self.get_log_p_Y_X(emissions, mask, tags)
         return log_p_y_x - log_z
 
-    def viterbi_decode(self, emissions, mask):
+    def viterbi_decode_batch(self, emissions, lengths):
+        mask = get_mask(lengths, self.config)
         seq_len = emissions.shape[1]
 
         log_prob = emissions[:, 0].clone()
@@ -133,10 +139,52 @@ def viterbi_decode(self, emissions, mask):
 
         return sentence_score, torch.flip(all_labels, [1])
 
+    def viterbi_decode(self, emissions, lengths):
+        bsz = emissions.shape[0]
+        all_path_indices = []
+        all_path_scores = []
+
+        for i in range(bsz):
+            viterbi_path, viterbi_score = self.viterbi_decode_single(lengths[i], emissions[i])
+            all_path_indices.append(viterbi_path)
+            all_path_scores.append(viterbi_score)
+
+        return all_path_indices, all_path_scores
+
+    def viterbi_decode_single(self, sequence_length, emission, top_k=1):
+        num_tags = emission.shape[0]
+        path_scores,  path_indices= [], []
+        path_scores.append(emission[0, :].unsqueeze(0))
+        for timestep in range(1, sequence_length):
+            summed_potentials = path_scores[timestep - 1].unsqueeze(-1) + self.transitions
+            scores, paths = torch.topk(summed_potentials, k=top_k, dim=0)
+            path_scores.append(emission[timestep, :] + scores.squeeze())
+            path_indices.append(paths.squeeze())
+
+        viterbi_score, best_paths = torch.topk(path_scores[-1], k=top_k, dim=0)
+        viterbi_paths = []
+        for i in range(top_k):
+            viterbi_path = [best_paths[i]]
+            for backward_timestep in reversed(path_indices):
+                viterbi_path.append(int(backward_timestep.view(-1)[viterbi_path[-1]]))
+            viterbi_path.reverse()
+
+            viterbi_path = [j % num_tags for j in viterbi_path]
+            viterbi_paths.append(viterbi_path)
+        '''
+        viterbi_path = [int(best_path.numpy())]
+        for backward_timestep in reversed(path_indices):
+            viterbi_path.append(int(backward_timestep[viterbi_path[-1]]))
+        # Reverse the backward path.
+        viterbi_path.reverse()
+         '''
+        return viterbi_paths, viterbi_score
+
+
     def structural_perceptron_loss(self, emissions, tags):
         mask = tags.ne(Constants.TAG_PAD_ID).float()
-
-        best_scores, pred = self.viterbi_decode(emissions, mask)
+        sequence_lnegths = mask.sum(dim=1)
+        best_scores, pred = self.viterbi_decode(emissions, sequence_lnegths)
         log_p_y_x = self.get_log_p_Y_X(emissions, mask, tags)
 
         delta = torch.sum(tags.ne(pred).float()*mask, 1)
diff --git a/neural_ner/model.py b/neural_ner/model.py
@@ -8,7 +8,7 @@
 import logging
 
 from crf import CRF_Loss
-from model_utils import get_mask, init_lstm_wt, init_linear_wt, get_word_embd
+from model_utils import init_lstm_wt, init_linear_wt, get_word_embd
 
 logging.basicConfig(level=logging.INFO)
 
@@ -158,6 +158,6 @@ def get_loss(self, logits, y, s_lens):
         return loss
 
     def predict(self, emissions, lengths):
-        mask = get_mask(lengths, self.config)
+
         best_scores, pred = self.crf.viterbi_decode(emissions, mask)
         return pred
diff --git a/neural_ner/old/sanity_checks.py b/neural_ner/old/sanity_checks.py
@@ -1,5 +1,6 @@
 import torch
 import torch.nn.functional as F
+import numpy as np
 
 def check_ignore_pads_in_loss():
     logits = torch.randn(2, 3, 4, requires_grad=True)
@@ -20,6 +21,38 @@ def check_ignore_pads_in_loss():
     print(loss)
 
 if __name__ == '__main__':
-    check_ignore_pads_in_loss()
+    #check_ignore_pads_in_loss()
+    ''' 
+    a_ = np.random.randint(1, 100, 5)
+    b_ = np.random.randint(1, 100, 5)
+
+    ab = sorted([aa*bb for bb in b_ for aa in a_], reverse=True)
+
+    print (ab[:5], a_, b_)
+
+    import heapq
+    a = sorted(a_, reverse=True)
+    b = sorted(b_, reverse=True)
+
+    pQueue = []
+    heapq.heappush(pQueue, (-a[0]*b[0], 0, 0))
+    topk = []
+    for _ in range(5):
+        v, ia, ib = heapq.heappop(pQueue)
+        topk.append(-v)
+        if ia + 1 < len(a):
+            heapq.heappush(pQueue, (-a[ia + 1]*b[ib], ia+1, ib))
+        if ib + 1 < len(b):
+            heapq.heappush(pQueue, (-a[ia] * b[ib+1], ia, ib+1))
+
+    print (topk)
+    '''
+    tx = np.random.randint(1, 100, size=(5,5))
+    e = np.random.randint(1, 100, size=5)
+
+    print (tx)
+    print(e)
+    print(np.expand_dims(e, -1) + tx)
+