Skip to content

Commit ea33c78

Browse files
committed
corefud.Delete misc=1
parameters (coref|empty|misc) turn on/off the deletion of the respective parts of CoNLL-U.
1 parent f2987e6 commit ea33c78

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

udapi/block/corefud/delete.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,16 @@
66

77
class Delete(Block):
88

9-
def __init__(self, empty=False, **kwargs):
9+
def __init__(self, coref=True, empty=False, misc=False, **kwargs):
10+
"""Args:
11+
coref: delete coreference attributes in MISC, i.e (Entity|Bridge|SplitAnte)
12+
empty: delete all empty nodes and references to them (from DEPS and MISC[Functor])
13+
misc: delete all attributes in MISC except for SpaceAfter
14+
"""
1015
super().__init__(**kwargs)
16+
self.coref = coref
1117
self.empty = empty
18+
self.misc = misc
1219

1320
def is_root_reachable_by_deps(self, node, parents_to_ignore=None):
1421
""" Check if the root node is reachable from node, possibly after deleting the parents_to_ignore nodes.
@@ -54,7 +61,8 @@ def _deps_ignore_nodes(self, node, parents_to_ignore):
5461

5562
def process_document(self, doc):
5663
# This block should work both with coreference loaded (deserialized) and not.
57-
doc._eid_to_entity = None
64+
if self.coref:
65+
doc._eid_to_entity = None
5866
for root in doc.trees:
5967
if self.empty:
6068
for node in root.descendants:
@@ -74,7 +82,12 @@ def process_document(self, doc):
7482
del node.misc['Functor']
7583
root.empty_nodes = []
7684

77-
for node in root.descendants + root.empty_nodes:
78-
node._mentions = []
79-
for attr in ('Entity', 'Bridge', 'SplitAnte'):
80-
del node.misc[attr]
85+
if self.coref or self.misc:
86+
for node in root.descendants + root.empty_nodes:
87+
if self.misc:
88+
node.misc = 'SpaceAfter=No' if node.no_space_after else None
89+
if self.coref:
90+
node._mentions = []
91+
if not self.misc:
92+
for attr in ('Entity', 'Bridge', 'SplitAnte'):
93+
del node.misc[attr]

0 commit comments

Comments
 (0)