6
6
7
7
class Delete (Block ):
8
8
9
- def __init__ (self , empty = False , ** kwargs ):
9
+ def __init__ (self , coref = True , empty = False , misc = False , ** kwargs ):
10
+ """Args:
11
+ coref: delete coreference attributes in MISC, i.e (Entity|Bridge|SplitAnte)
12
+ empty: delete all empty nodes and references to them (from DEPS and MISC[Functor])
13
+ misc: delete all attributes in MISC except for SpaceAfter
14
+ """
10
15
super ().__init__ (** kwargs )
16
+ self .coref = coref
11
17
self .empty = empty
18
+ self .misc = misc
12
19
13
20
def is_root_reachable_by_deps (self , node , parents_to_ignore = None ):
14
21
""" Check if the root node is reachable from node, possibly after deleting the parents_to_ignore nodes.
@@ -54,7 +61,8 @@ def _deps_ignore_nodes(self, node, parents_to_ignore):
54
61
55
62
def process_document (self , doc ):
56
63
# This block should work both with coreference loaded (deserialized) and not.
57
- doc ._eid_to_entity = None
64
+ if self .coref :
65
+ doc ._eid_to_entity = None
58
66
for root in doc .trees :
59
67
if self .empty :
60
68
for node in root .descendants :
@@ -74,7 +82,12 @@ def process_document(self, doc):
74
82
del node .misc ['Functor' ]
75
83
root .empty_nodes = []
76
84
77
- for node in root .descendants + root .empty_nodes :
78
- node ._mentions = []
79
- for attr in ('Entity' , 'Bridge' , 'SplitAnte' ):
80
- del node .misc [attr ]
85
+ if self .coref or self .misc :
86
+ for node in root .descendants + root .empty_nodes :
87
+ if self .misc :
88
+ node .misc = 'SpaceAfter=No' if node .no_space_after else None
89
+ if self .coref :
90
+ node ._mentions = []
91
+ if not self .misc :
92
+ for attr in ('Entity' , 'Bridge' , 'SplitAnte' ):
93
+ del node .misc [attr ]
0 commit comments