Skip to content

Commit 267e5fa

Browse files
committed
Portuguese "no" can be a pronoun as well
fixes udapi#85
1 parent af98011 commit 267e5fa

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

udapi/block/ud/pt/addmwt.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@
3939
'nisso': {'form': 'em isso', 'lemma': 'em este'},
4040
'nisto': {'form': 'em isto', 'lemma': 'em este',
4141
'upos': 'ADP PRON', 'main': 1, 'shape': 'subtree'},
42-
'no': {'form': 'em o', 'lemma': 'em o'},
43-
'nos': {'form': 'em os', 'lemma': 'em o'},
42+
'no': {'form': 'em o', 'lemma': 'em o'}, # PRON cases are excluded below
43+
'nos': {'form': 'em os', 'lemma': 'em o'}, # PRON cases are excluded below
4444
'num': {'form': 'em um', 'lemma': 'em um'},
4545
'numa': {'form': 'em uma', 'lemma': 'em um'},
4646
'numas': {'form': 'em umas', 'lemma': 'em um'},
@@ -79,6 +79,11 @@ class AddMwt(udapi.block.ud.addmwt.AddMwt):
7979

8080
def multiword_analysis(self, node):
8181
"""Return a dict with MWT info or None if `node` does not represent a multiword token."""
82+
83+
# "no" can be either a contraction of "em o", or a pronoun
84+
if node.form.lower() in ('no', 'nos') and node.upos == 'PRON':
85+
return
86+
8287
analysis = MWTS.get(node.form.lower(), None)
8388

8489
# If the input is e.g.:

0 commit comments

Comments
 (0)