diff --git a/udapi/block/ud/cs/addmwt.py b/udapi/block/ud/cs/addmwt.py index 7f3a4283..c1b3783a 100644 --- a/udapi/block/ud/cs/addmwt.py +++ b/udapi/block/ud/cs/addmwt.py @@ -126,7 +126,7 @@ def multiword_analysis(self, node): # could be masculine or neuter. We pick Gender=Masc and Animacy=Anim # by default, unless the original token was annotated as Animacy=Inan # or Gender=Neut. - m = re.match(r"^(na|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower()) + m = re.match(r"^(na|nade|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower()) if m: node.misc['AddMwt'] = '' # Remove vocalization from 'přěde' (přěd něj) but keep it in 'skrze' diff --git a/udapi/block/ud/cs/markfeatsbugs.py b/udapi/block/ud/cs/markfeatsbugs.py index cbf57605..6c3672ac 100644 --- a/udapi/block/ud/cs/markfeatsbugs.py +++ b/udapi/block/ud/cs/markfeatsbugs.py @@ -37,7 +37,8 @@ def process_node(self, node): 'Gender': ['Neut'], 'Number': ['Sing', 'Dual', 'Plur'], 'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'], - 'Foreign': ['Yes'] + 'Foreign': ['Yes'], + 'Abbr': ['Yes'] }) elif node.feats['Gender'] == 'Masc': self.check_required_features(node, ['Animacy']) @@ -46,13 +47,15 @@ def process_node(self, node): 'Animacy': ['Anim', 'Inan'], 'Number': ['Sing', 'Dual', 'Plur'], 'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'], - 'Foreign': ['Yes']}) + 'Foreign': ['Yes'], + 'Abbr': ['Yes']}) else: self.check_allowed_features(node, { 'Gender': ['Masc', 'Fem', 'Neut'], 'Number': ['Sing', 'Dual', 'Plur'], 'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'], - 'Foreign': ['Yes']}) + 'Foreign': ['Yes'], + 'Abbr': ['Yes']}) # PROPER NOUNS ######################################################### elif node.upos == 'PROPN': self.check_required_features(node, ['Gender', 'Number', 'Case']) @@ -64,14 +67,16 @@ def process_node(self, node): 'Number': ['Sing', 'Dual', 'Plur'], 'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'], 'NameType': ['Giv', 'Sur', 'Geo', 'Nat'], - 'Foreign': ['Yes']}) + 'Foreign': ['Yes'], + 'Abbr': ['Yes']}) else: self.check_allowed_features(node, { 'Gender': ['Masc', 'Fem', 'Neut'], 'Number': ['Sing', 'Dual', 'Plur'], 'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'], 'NameType': ['Giv', 'Sur', 'Geo', 'Nat'], - 'Foreign': ['Yes']}) + 'Foreign': ['Yes'], + 'Abbr': ['Yes']}) # ADJECTIVES ########################################################### elif node.upos == 'ADJ': if node.feats['Poss'] == 'Yes': # possessive adjectives diff --git a/udapi/block/ud/fixadvmodbyupos.py b/udapi/block/ud/fixadvmodbyupos.py index e7aa2fed..916910b5 100644 --- a/udapi/block/ud/fixadvmodbyupos.py +++ b/udapi/block/ud/fixadvmodbyupos.py @@ -51,6 +51,8 @@ def process_node(self, node): elif node.udeprel == 'mark': if node.upos in ['PRON', 'DET']: node.deprel = 'nsubj' # it could be also obj, iobj, obl or nmod; just guessing what might be more probable + elif node.upos == 'NOUN': + node.deprel = 'obl' elif node.upos == 'INTJ': node.deprel = 'discourse' elif node.udeprel == 'cc': @@ -71,6 +73,8 @@ def process_node(self, node): node.deprel = 'aux' elif node.upos == 'VERB': node.deprel = 'dep' + elif node.upos == 'SCONJ': + node.deprel = 'mark' elif node.upos == 'X': node.deprel = 'dep' elif node.udeprel == 'nummod':