refactor

atulkum · atulkum · commit 48ee4b109c09 · 2019-01-19T22:27:43.000-08:00
diff --git a/neural_ner/config.py b/neural_ner/config.py
@@ -2,6 +2,8 @@
 import torch
 import numpy as np
 
+print('pytorch version', torch.__version__)
+
 np.random.seed(123)
 torch.manual_seed(123)
 if torch.cuda.is_available():
@@ -54,6 +56,8 @@ class Config(object):
 config.model_name = 'model.NER_SOFTMAX_CHAR'
 config.optimizer = 'sgd'
 
+config.use_pretrain_embd = True
+
 # config postprocess
 config.is_cuda = config.is_cuda and torch.cuda.is_available()
 
diff --git a/neural_ner/data_utils/vocab.py b/neural_ner/data_utils/vocab.py
@@ -47,7 +47,11 @@ def word_mapping(self, sentences):
         id_to_word = {i + start_vocab_len: v[0] for i, v in enumerate(sorted_items)}
         '''
         #augmnet with pretrained words
-        self.glove_vectors = self.get_glove()
+        if self.config.use_pretrain_embd:
+            self.glove_vectors = self.get_glove()
+        else:
+            self.glove_vectors = {}
+
         word_freq_map.update(self.glove_vectors.keys())
 
         id_to_word = {}
@@ -102,6 +106,7 @@ def get_glove(self):
 
         return word_to_vector
 
+
 if __name__ == '__main__':
     from config import config
     vocab = Vocab(config)
diff --git a/neural_ner/model.py b/neural_ner/model.py
@@ -11,8 +11,6 @@
 from crf import CRF_Loss
 from model_utils import get_mask, init_lstm_wt, init_linear_wt, get_word_embd
 
-print('pytorch version', torch.__version__)
-
 logging.basicConfig(level=logging.INFO)
 
 class NER_SOFTMAX_CHAR(nn.Module):
diff --git a/neural_ner/train_utils.py b/neural_ner/train_utils.py
@@ -111,23 +111,20 @@ def get_metric(self, log_dir, is_cf=False):
                 print (line)
 
             # Confusion matrix with accuracy for each tag
-            print (("{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * self.n_tags)) % (
-                "ID", "NE", "Total",
-                *([self.vocab.id_to_tag[i] for i in range(self.n_tags)] + ["Percent"])
-            ))
+            format_str = "{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * self.n_tags)
+            values = [self.vocab.id_to_tag[i] for i in range(self.n_tags)]
+            print(format_str.format("ID", "NE", "Total", *values, "Percent"))
+
             for i in range(self.n_tags):
-                print (("{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * self.n_tags)) % (
-                    str(i), self.vocab.id_to_tag[i], str(self.count[i].sum()),
-                    *([self.count[i][j] for j in range(self.n_tags)] +
-                      ["%.3f" % (self.count[i][i] * 100. / max(1, self.count[i].sum()))])
-            ))
+                percent = "{:.3f}".format(self.count[i][i] * 100. / max(1, self.count[i].sum()))
+                values = [self.count[i][j] for j in range(self.n_tags)]
+                print (format_str.format(str(i), self.vocab.id_to_tag[i], str(self.count[i].sum()), *values, percent))
 
             # Global accuracy
-            print ("%i/%i (%.5f%%)" % (
+            print ("{}/{} ({:.3f} %)" .format (
                 self.count.trace(), self.count.sum(), 100. * self.count.trace() / max(1, self.count.sum())
             ))
 
         # F1 on all entities
         return float(eval_lines[1].strip().split()[-1])
 
-