Bug fix

yzhangcs · yzhangcs · commit 59bf09cfb2a9 · 2021-07-23T14:12:16.000+08:00
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name='supar',
-    version='1.1.1',
+    version='1.1.2',
     author='Yu Zhang',
     author_email='yzhang.cs@outlook.com',
     description='Syntactic/Semantic Parsing Models',
diff --git a/supar/__init__.py b/supar/__init__.py
@@ -30,7 +30,7 @@
            'LBPSemanticDependency',
            'MFVISemanticDependency']
 
-__version__ = '1.1.1'
+__version__ = '1.1.2'
 
 PARSER = {parser.NAME: parser for parser in [BiaffineDependencyParser,
                                              CRFDependencyParser,
@@ -51,6 +51,8 @@
     'biaffine-dep-roberta-en': 'ptb.biaffine.dep.roberta',
     'biaffine-dep-electra-zh': 'ctb7.biaffine.dep.electra',
     'biaffine-dep-xlmr': 'ud.biaffine.dep.xlmr',
+    'mm-con-en': 'ptb.mm.con.lstm.char',
+    # 'mm-con-zh': 'ctb7.mm.con.lstm.char',
     'crf-con-en': 'ptb.crf.con.lstm.char',
     'crf-con-zh': 'ctb7.crf.con.lstm.char',
     'crf-con-roberta-en': 'ptb.crf.con.roberta',
@@ -63,8 +65,7 @@
     'vi-sdp-roberta-en': 'dm.vi.sdp.roberta',
     'vi-sdp-electra-zh': 'semeval16.vi.sdp.electra'
 }
-MODEL = {n: f"{SRC['github']}/v1.1.0/{m}.zip" for n, m in NAME.items()}
-CONFIG = {n: f"{SRC['github']}/v1.1.0/{m}.ini" for n, m in NAME.items()}
-
-MODEL['biaffine-sdp-en'] = f"{SRC['hlt']}/v1.1.1/{NAME['biaffine-sdp-en']}.zip"
-MODEL['biaffine-sdp-zh'] = f"{SRC['hlt']}/v1.1.1/{NAME['biaffine-sdp-zh']}.zip"
+MODEL = {n: f"{SRC['hlt']}/v1.1.0/{m}.zip" for n, m in NAME.items()}
+CONFIG = {n: f"{SRC['hlt']}/v1.1.0/{m}.ini" for n, m in NAME.items()}
+MODEL['biaffine-sdp-en'] = f"{SRC['hlt']}/v1.1.2/{NAME['biaffine-sdp-en']}.zip"
+MODEL['biaffine-sdp-zh'] = f"{SRC['hlt']}/v1.1.2/{NAME['biaffine-sdp-zh']}.zip"
diff --git a/supar/parsers/con.py b/supar/parsers/con.py
@@ -28,10 +28,6 @@ class CRFConstituencyParser(Parser):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        if self.args.feat in ('char', 'bert'):
-            self.WORD, self.FEAT = self.transform.WORD
-        else:
-            self.WORD, self.FEAT = self.transform.WORD, self.transform.POS
         self.TREE = self.transform.TREE
         self.CHART = self.transform.CHART
 
diff --git a/supar/parsers/dep.py b/supar/parsers/dep.py
@@ -158,7 +158,7 @@ def _train(self, loader):
         bar, metric = progress_bar(loader), AttachmentMetric()
 
         for i, batch in enumerate(bar, 1):
-            words, *feats, arcs, rels = batch
+            words, texts, *feats, arcs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -178,7 +178,7 @@ def _train(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             metric(arc_preds, rel_preds, arcs, rels, mask)
             bar.set_postfix_str(f"lr: {self.scheduler.get_last_lr()[0]:.4e} - loss: {loss:.4f} - {metric}")
         logger.info(f"{bar.postfix}")
@@ -190,7 +190,7 @@ def _evaluate(self, loader):
         total_loss, metric = 0, AttachmentMetric()
 
         for batch in loader:
-            words, *feats, arcs, rels = batch
+            words, texts, *feats, arcs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -202,7 +202,7 @@ def _evaluate(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             total_loss += loss.item()
             metric(arc_preds, rel_preds, arcs, rels, mask)
         total_loss /= len(loader)
@@ -215,7 +215,7 @@ def _predict(self, loader):
 
         preds = {'arcs': [], 'rels': [], 'probs': [] if self.args.prob else None}
         for batch in progress_bar(loader):
-            words, *feats = batch
+            words, texts, *feats = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -470,7 +470,7 @@ def _train(self, loader):
         bar, metric = progress_bar(loader), AttachmentMetric()
 
         for i, batch in enumerate(bar, 1):
-            words, *feats, arcs, rels = batch
+            words, texts, *feats, arcs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -490,7 +490,7 @@ def _train(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             metric(arc_preds, rel_preds, arcs, rels, mask)
             bar.set_postfix_str(f"lr: {self.scheduler.get_last_lr()[0]:.4e} - loss: {loss:.4f} - {metric}")
         logger.info(f"{bar.postfix}")
@@ -502,7 +502,7 @@ def _evaluate(self, loader):
         total_loss, metric = 0, AttachmentMetric()
 
         for batch in loader:
-            words, *feats, arcs, rels = batch
+            words, texts, *feats, arcs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -514,7 +514,7 @@ def _evaluate(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             total_loss += loss.item()
             metric(arc_preds, rel_preds, arcs, rels, mask)
         total_loss /= len(loader)
@@ -527,7 +527,7 @@ def _predict(self, loader):
 
         preds = {'arcs': [], 'rels': [], 'probs': [] if self.args.prob else None}
         for batch in progress_bar(loader):
-            words, *feats = batch
+            words, texts, *feats = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -688,7 +688,7 @@ def _train(self, loader):
         bar, metric = progress_bar(loader), AttachmentMetric()
 
         for i, batch in enumerate(bar, 1):
-            words, *feats, arcs, sibs, rels = batch
+            words, texts, *feats, arcs, sibs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -708,7 +708,7 @@ def _train(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             metric(arc_preds, rel_preds, arcs, rels, mask)
             bar.set_postfix_str(f"lr: {self.scheduler.get_last_lr()[0]:.4e} - loss: {loss:.4f} - {metric}")
         logger.info(f"{bar.postfix}")
@@ -720,7 +720,7 @@ def _evaluate(self, loader):
         total_loss, metric = 0, AttachmentMetric()
 
         for batch in loader:
-            words, *feats, arcs, sibs, rels = batch
+            words, texts, *feats, arcs, sibs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -732,7 +732,7 @@ def _evaluate(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             total_loss += loss.item()
             metric(arc_preds, rel_preds, arcs, rels, mask)
         total_loss /= len(loader)
@@ -745,7 +745,7 @@ def _predict(self, loader):
 
         preds = {'arcs': [], 'rels': [], 'probs': [] if self.args.prob else None}
         for batch in progress_bar(loader):
-            words, *feats = batch
+            words, texts, *feats = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -995,7 +995,7 @@ def _train(self, loader):
         bar, metric = progress_bar(loader), AttachmentMetric()
 
         for i, batch in enumerate(bar, 1):
-            words, *feats, arcs, rels = batch
+            words, texts, *feats, arcs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -1015,7 +1015,7 @@ def _train(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             metric(arc_preds, rel_preds, arcs, rels, mask)
             bar.set_postfix_str(f"lr: {self.scheduler.get_last_lr()[0]:.4e} - loss: {loss:.4f} - {metric}")
         logger.info(f"{bar.postfix}")
@@ -1027,7 +1027,7 @@ def _evaluate(self, loader):
         total_loss, metric = 0, AttachmentMetric()
 
         for batch in loader:
-            words, *feats, arcs, rels = batch
+            words, texts, *feats, arcs, rels = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
@@ -1039,7 +1039,7 @@ def _evaluate(self, loader):
                 mask &= arcs.ge(0)
             # ignore all punctuation if not specified
             if not self.args.punct:
-                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in batch.sentences for w in s.words]))
+                mask.masked_scatter_(mask, ~mask.new_tensor([ispunct(w) for s in texts for w in s]))
             total_loss += loss.item()
             metric(arc_preds, rel_preds, arcs, rels, mask)
         total_loss /= len(loader)
@@ -1052,7 +1052,7 @@ def _predict(self, loader):
 
         preds = {'arcs': [], 'rels': [], 'probs': [] if self.args.prob else None}
         for batch in progress_bar(loader):
-            words, *feats = batch
+            words, texts, *feats = batch
             word_mask = words.ne(self.args.pad_index)
             mask = word_mask if len(words.shape) < 3 else word_mask.any(-1)
             # ignore the first token of each sentence
diff --git a/supar/parsers/sdp.py b/supar/parsers/sdp.py
@@ -28,7 +28,6 @@ class BiaffineSemanticDependencyParser(Parser):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        self.WORD, self.CHAR, self.ELMO, self.BERT = self.transform.FORM
         self.LEMMA = self.transform.LEMMA
         self.TAG = self.transform.POS
         self.LABEL = self.transform.PHEAD
@@ -301,7 +300,6 @@ class VISemanticDependencyParser(BiaffineSemanticDependencyParser):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        self.WORD, self.CHAR, self.ELMO, self.BERT = self.transform.FORM
         self.LEMMA = self.transform.LEMMA
         self.TAG = self.transform.POS
         self.LABEL = self.transform.PHEAD
diff --git a/supar/parsers/srl.py b/supar/parsers/srl.py
@@ -27,7 +27,6 @@ class VISemanticRoleLabelingParser(Parser):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        self.WORD, self.CHAR, self.ELMO, self.BERT = self.transform.FORM
         self.LEMMA = self.transform.LEMMA
         self.TAG = self.transform.POS
         self.LABEL = self.transform.PHEAD