goingcoder
diff --git a/‎neural_ner/crf.py
Lines changed: 10 additions & 9 deletions b/‎neural_ner/crf.py
Lines changed: 10 additions & 9 deletions
diff --git a/‎neural_ner/data_utils/vocab.py
Lines changed: 6 additions & 3 deletions b/‎neural_ner/data_utils/vocab.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎semantic_parsing/__init__.py b/‎semantic_parsing/__init__.py
diff --git a/‎semantic_parsing/data_utils/__init__.py b/‎semantic_parsing/data_utils/__init__.py
diff --git a/‎semantic_parsing/data_utils/batcher.py b/‎semantic_parsing/data_utils/batcher.py
@@ -42,8 +42,9 @@ def get_log_p_z(self, emissions, mask, seq_len):
         log_alpha += self.transitions[: self.start_tag, self.end_tag].unsqueeze(0)
         return torch.logsumexp(log_alpha.squeeze(1), 1)
 
-    def get_log_p_Y_X(self, emissions, mask, seq_len, tags):
-        tags[tags < 0] = 0 # clone and then set
+    def get_log_p_Y_X(self, emissions, mask, seq_len, orig_tags):
+        tags = orig_tags.clone()
+        tags[tags < 0] = 0
 
         llh = self.transitions[self.start_tag, tags[:, 0]].unsqueeze(1)
         llh += emissions[:, 0, :].gather(1, tags[:, 0].view(-1, 1)) * mask[:, 0].unsqueeze(1)
@@ -106,28 +107,28 @@ def viterbi_decode(self, emissions, lengths):
         _, max_indices_from_scores = torch.max(best_scores, 2)
 
         valid_index_tensor = torch.tensor(0).long()
-        padding_tensor = torch.tensor(Constants.PAD_ID).long()
+        padding_tensor = torch.tensor(Constants.TAG_PAD_ID).long()
 
         labels = max_indices_from_scores[:, seq_len - 1]
-        labels = torch.where(1.0 - mask[:, seq_len - 1], padding_tensor, labels)
+        labels = torch.where(mask[:, seq_len - 1] != 1.0, padding_tensor, labels)
         all_labels = labels.unsqueeze(1).long()
 
         for idx in range(seq_len - 2, -1, -1):
             indices_for_lookup = all_labels[:, -1].clone()
-            indices_for_lookup = torch.where(indices_for_lookup == self.ignore_index, valid_index_tensor,
+            indices_for_lookup = torch.where(indices_for_lookup == Constants.TAG_PAD_ID, valid_index_tensor,
                                              indices_for_lookup)
 
             indices_from_prev_pos = best_paths[:, idx, :].gather(1, indices_for_lookup.view(-1, 1).long()).squeeze(1)
-            indices_from_prev_pos = torch.where((1.0 - mask[:, idx + 1]), padding_tensor, indices_from_prev_pos)
+            indices_from_prev_pos = torch.where(mask[:, idx + 1] != 1.0, padding_tensor, indices_from_prev_pos)
 
             indices_from_max_scores = max_indices_from_scores[:, idx]
-            indices_from_max_scores = torch.where(mask[:, idx + 1], padding_tensor, indices_from_max_scores)
+            indices_from_max_scores = torch.where(mask[:, idx + 1] == 1.0, padding_tensor, indices_from_max_scores)
 
-            labels = torch.where(indices_from_max_scores == self.ignore_index, indices_from_prev_pos,
+            labels = torch.where(indices_from_max_scores == Constants.TAG_PAD_ID, indices_from_prev_pos,
                                  indices_from_max_scores)
 
             # Set to ignore_index if present state is not valid.
-            labels = torch.where((1 - mask[:, idx]),padding_tensor, labels)
+            labels = torch.where(mask[:, idx] != 1.0, padding_tensor, labels)
             all_labels = torch.cat((all_labels, labels.view(-1, 1).long()), 1)
 
         return best_scores, torch.flip(all_labels, [1])
 
@@ -61,12 +61,15 @@ def word_mapping(self, sentences):
         self.word_to_id = {v: k for k, v in id_to_word.items()}
         self.id_to_word = id_to_word
 
-        char_freq_map = create_freq_map(chars)
-
-        id_to_char = {i+start_vocab_len: v for i, v in enumerate(char_freq_map)}
+        id_to_char = {}
         for i, v in enumerate(Constants._START_VOCAB):
             id_to_char[i] = v
 
+        char_freq_map = create_freq_map(chars)
+
+        for v in char_freq_map:
+            id_to_char[len(id_to_char)] = v
+
         print("Found {} unique characters".format(len(char_freq_map)))
 
         self.char_to_id = {v: k for k, v in id_to_char.items()}