Transform a tree into a single chart

yzhangcs · yzhangcs · commit c9f8b135adfa · 2020-11-21T22:21:54.000+08:00
diff --git a/supar/models/constituency.py b/supar/models/constituency.py
@@ -206,17 +206,15 @@ def forward(self, words, feats):
 
         return s_span, s_label
 
-    def loss(self, s_span, s_label, spans, labels, mask, mbr=True):
+    def loss(self, s_span, s_label, charts, mask, mbr=True):
         r"""
         Args:
             s_span (~torch.Tensor): ``[batch_size, seq_len, seq_len]``.
                 Scores of all spans
             s_label (~torch.Tensor): ``[batch_size, seq_len, seq_len, n_labels]``.
                 Scores of all labels on each span.
-            spans (~torch.BoolTensor): ``[batch_size, seq_len, seq_len]``.
-                The tensor of gold-standard spans. ``True`` denotes there exist a span.
-            labels (~torch.LongTensor): ``[batch_size, seq_len, seq_len]``.
-                The tensor of gold-standard labels.
+            charts (~torch.LongTensor): ``[batch_size, seq_len, seq_len]``.
+                The tensor of gold-standard labels, in which positions without labels are filled with -1.
             mask (~torch.BoolTensor): ``[batch_size, seq_len, seq_len]``.
                 The mask for covering the unpadded tokens in each chart.
             mbr (bool):
@@ -228,9 +226,9 @@ def loss(self, s_span, s_label, spans, labels, mask, mbr=True):
                 original span scores of shape ``[batch_size, seq_len, seq_len]`` if ``mbr=False``, or marginals otherwise.
         """
 
-        span_mask = spans & mask
-        span_loss, span_probs = self.crf(s_span, mask, spans, mbr)
-        label_loss = self.criterion(s_label[span_mask], labels[span_mask])
+        span_mask = charts.ge(0) & mask
+        span_loss, span_probs = self.crf(s_span, mask, span_mask, mbr)
+        label_loss = self.criterion(s_label[span_mask], charts[span_mask])
         loss = span_loss + label_loss
 
         return loss, span_probs
@@ -252,5 +250,4 @@ def decode(self, s_span, s_label, mask):
 
         span_preds = cky(s_span, mask)
         label_preds = s_label.argmax(-1).tolist()
-        return [[(i, j, labels[i][j]) for i, j in spans]
-                for spans, labels in zip(span_preds, label_preds)]
+        return [[(i, j, labels[i][j]) for i, j in spans] for spans, labels in zip(span_preds, label_preds)]
diff --git a/supar/parsers/crf_constituency.py b/supar/parsers/crf_constituency.py
@@ -133,15 +133,15 @@ def _train(self, loader):
 
         bar = progress_bar(loader)
 
-        for words, feats, trees, (spans, labels) in bar:
+        for words, feats, trees, charts in bar:
             self.optimizer.zero_grad()
 
             batch_size, seq_len = words.shape
             lens = words.ne(self.args.pad_index).sum(1) - 1
             mask = lens.new_tensor(range(seq_len - 1)) < lens.view(-1, 1, 1)
             mask = mask & mask.new_ones(seq_len-1, seq_len-1).triu_(1)
             s_span, s_label = self.model(words, feats)
-            loss, _ = self.model.loss(s_span, s_label, spans, labels, mask, self.args.mbr)
+            loss, _ = self.model.loss(s_span, s_label, charts, mask, self.args.mbr)
             loss.backward()
             nn.utils.clip_grad_norm_(self.model.parameters(), self.args.clip)
             self.optimizer.step()
@@ -155,13 +155,13 @@ def _evaluate(self, loader):
 
         total_loss, metric = 0, BracketMetric()
 
-        for words, feats, trees, (spans, labels) in loader:
+        for words, feats, trees, charts in loader:
             batch_size, seq_len = words.shape
             lens = words.ne(self.args.pad_index).sum(1) - 1
             mask = lens.new_tensor(range(seq_len - 1)) < lens.view(-1, 1, 1)
             mask = mask & mask.new_ones(seq_len-1, seq_len-1).triu_(1)
             s_span, s_label = self.model(words, feats)
-            loss, s_span = self.model.loss(s_span, s_label, spans, labels, mask, self.args.mbr)
+            loss, s_span = self.model.loss(s_span, s_label, charts, mask, self.args.mbr)
             chart_preds = self.model.decode(s_span, s_label, mask)
             # since the evaluation relies on terminals,
             # the tree should be first built and then factorized
diff --git a/supar/utils/field.py b/supar/utils/field.py
@@ -321,26 +321,18 @@ class ChartField(Field):
     Field dealing with constituency trees.
 
     This field receives sequences of binarized trees factorized in pre-order,
-    and returns two tensors representing the bracketing trees and labels on each constituent respectively.
+    and returns charts filled with labels on each constituent.
 
     Examples:
         >>> sequence = [(0, 5, 'S'), (0, 4, 'S|<>'), (0, 1, 'NP'), (1, 4, 'VP'), (1, 2, 'VP|<>'),
                         (2, 4, 'S+VP'), (2, 3, 'VP|<>'), (3, 4, 'NP'), (4, 5, 'S|<>')]
-        >>> spans, labels = field.transform([sequence])[0]  # this example field is built from ptb
-        >>> spans
-        tensor([[False,  True, False, False,  True,  True],
-                [False, False,  True, False,  True, False],
-                [False, False, False,  True,  True, False],
-                [False, False, False, False,  True, False],
-                [False, False, False, False, False,  True],
-                [False, False, False, False, False, False]])
-        >>> labels
-        tensor([[  0,  37,   0,   0, 107,  79],
-                [  0,   0, 120,   0, 112,   0],
-                [  0,   0,   0, 120,  86,   0],
-                [  0,   0,   0,   0,  37,   0],
-                [  0,   0,   0,   0,   0, 107],
-                [  0,   0,   0,   0,   0,   0]])
+        >>> field.transform([sequence])[0]
+        tensor([[ -1,  37,  -1,  -1, 107,  79],
+                [ -1,  -1, 120,  -1, 112,  -1],
+                [ -1,  -1,  -1, 120,  86,  -1],
+                [ -1,  -1,  -1,  -1,  37,  -1],
+                [ -1,  -1,  -1,  -1,  -1, 107],
+                [ -1,  -1,  -1,  -1,  -1,  -1]])
     """
 
     def build(self, dataset, min_freq=1):
@@ -351,20 +343,12 @@ def build(self, dataset, min_freq=1):
         self.vocab = Vocab(counter, min_freq, self.specials, self.unk_index)
 
     def transform(self, sequences):
-        sequences = [self.preprocess(seq) for seq in sequences]
-        spans, labels = [], []
-
+        charts = []
         for sequence in sequences:
+            sequence = self.preprocess(sequence)
             seq_len = sequence[0][1] + 1
-            span_chart = torch.full((seq_len, seq_len), self.pad_index, dtype=torch.bool)
-            label_chart = torch.full((seq_len, seq_len), self.pad_index, dtype=torch.long)
+            chart = torch.full((seq_len, seq_len), -1, dtype=torch.long)
             for i, j, label in sequence:
-                span_chart[i, j] = 1
-                label_chart[i, j] = self.vocab[label]
-            spans.append(span_chart)
-            labels.append(label_chart)
-
-        return list(zip(spans, labels))
-
-    def compose(self, sequences):
-        return [pad(i).to(self.device) for i in zip(*sequences)]
+                chart[i, j] = self.vocab[label]
+            charts.append(chart)
+        return charts