Skip to content

Commit abd2209

Browse files
committed
better corefud.MoveHead: less warnings, possibility to store all bugs in misc['Bug']
1 parent 07ff9d5 commit abd2209

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

udapi/block/corefud/movehead.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
class MoveHead(Block):
77
"""Block corefud.MoveHead moves the head to the highest node in each mention."""
88

9-
def __init__(self, nontreelet='fix', **kwargs):
9+
def __init__(self, bugs='warn', **kwargs):
1010
self.counter = Counter()
11-
self.nontreelet = nontreelet
11+
self.bugs = bugs
1212
super().__init__(**kwargs)
1313

1414
def find_head(self, mention):
@@ -19,25 +19,34 @@ def find_head(self, mention):
1919
self.counter['with_empty'] += 1
2020
for empty_node in empty_nodes:
2121
parents = [d['parent'] for d in empty_node.deps if not d['parent'].is_empty()]
22-
if parents and parents[0] not in non_empty:
23-
non_empty.append(parents[0])
22+
if parents:
23+
if parents[0] not in non_empty:
24+
non_empty.append(parents[0])
2425
else:
2526
# TODO we should climb up, but preventing cycles
2627
# We could also introduce empty_node.nonempty_ancestor
27-
logging.warning(f"could not find non-empty parent of {empty_node} for mention {mention.head}")
28+
if 'warn' in self.bugs:
29+
logging.warning(f"could not find non-empty parent of {empty_node} for mention {mention.head}")
30+
if 'mark' in self.bugs:
31+
node.misc['Bug'] = 'no-parent-of-empty'
2832
non_empty.sort()
2933

3034
(highest, added_nodes) = find_minimal_common_treelet(*non_empty)
3135
if highest in mention.words:
3236
return highest, 'treelet'
3337

34-
if 'warn' in self.nontreelet:
38+
if 'warn' in self.bugs:
3539
logging.warning(f"Non-treelet mention in {mention.head} (nearest common antecedent={highest})")
36-
if 'mark' in self.nontreelet:
37-
node.misc['Mark'] = 'non-treelet-mention'
40+
if 'mark' in self.bugs:
41+
mention.head.misc['Bug'] = 'non-treelet-mention'
3842
for word in mention.words:
3943
if not word.is_empty() and word.parent not in non_empty:
4044
return word, 'nontreelet'
45+
46+
if 'warn' in self.bugs:
47+
logging.warning(f"Strange mention {mention.head} (nearest common antecedent={highest})")
48+
if 'mark' in self.bugs:
49+
mention.head.misc['Bug'] = 'strange-mention'
4150
return mention.head, 'bug'
4251

4352
def process_document(self, doc):

0 commit comments

Comments
 (0)