Skip to content

Commit 7459d35

Browse files
committed
Updated processing of Old Czech data.
1 parent 0abec00 commit 7459d35

File tree

3 files changed

+15
-6
lines changed

3 files changed

+15
-6
lines changed

udapi/block/ud/cs/addmwt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def multiword_analysis(self, node):
126126
# could be masculine or neuter. We pick Gender=Masc and Animacy=Anim
127127
# by default, unless the original token was annotated as Animacy=Inan
128128
# or Gender=Neut.
129-
m = re.match(r"^(na|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
129+
m = re.match(r"^(na|nade|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
130130
if m:
131131
node.misc['AddMwt'] = ''
132132
# Remove vocalization from 'přěde' (přěd něj) but keep it in 'skrze'

udapi/block/ud/cs/markfeatsbugs.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ def process_node(self, node):
3737
'Gender': ['Neut'],
3838
'Number': ['Sing', 'Dual', 'Plur'],
3939
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
40-
'Foreign': ['Yes']
40+
'Foreign': ['Yes'],
41+
'Abbr': ['Yes']
4142
})
4243
elif node.feats['Gender'] == 'Masc':
4344
self.check_required_features(node, ['Animacy'])
@@ -46,13 +47,15 @@ def process_node(self, node):
4647
'Animacy': ['Anim', 'Inan'],
4748
'Number': ['Sing', 'Dual', 'Plur'],
4849
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
49-
'Foreign': ['Yes']})
50+
'Foreign': ['Yes'],
51+
'Abbr': ['Yes']})
5052
else:
5153
self.check_allowed_features(node, {
5254
'Gender': ['Masc', 'Fem', 'Neut'],
5355
'Number': ['Sing', 'Dual', 'Plur'],
5456
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
55-
'Foreign': ['Yes']})
57+
'Foreign': ['Yes'],
58+
'Abbr': ['Yes']})
5659
# PROPER NOUNS #########################################################
5760
elif node.upos == 'PROPN':
5861
self.check_required_features(node, ['Gender', 'Number', 'Case'])
@@ -64,14 +67,16 @@ def process_node(self, node):
6467
'Number': ['Sing', 'Dual', 'Plur'],
6568
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
6669
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
67-
'Foreign': ['Yes']})
70+
'Foreign': ['Yes'],
71+
'Abbr': ['Yes']})
6872
else:
6973
self.check_allowed_features(node, {
7074
'Gender': ['Masc', 'Fem', 'Neut'],
7175
'Number': ['Sing', 'Dual', 'Plur'],
7276
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
7377
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
74-
'Foreign': ['Yes']})
78+
'Foreign': ['Yes'],
79+
'Abbr': ['Yes']})
7580
# ADJECTIVES ###########################################################
7681
elif node.upos == 'ADJ':
7782
if node.feats['Poss'] == 'Yes': # possessive adjectives

udapi/block/ud/fixadvmodbyupos.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def process_node(self, node):
5151
elif node.udeprel == 'mark':
5252
if node.upos in ['PRON', 'DET']:
5353
node.deprel = 'nsubj' # it could be also obj, iobj, obl or nmod; just guessing what might be more probable
54+
elif node.upos == 'NOUN':
55+
node.deprel = 'obl'
5456
elif node.upos == 'INTJ':
5557
node.deprel = 'discourse'
5658
elif node.udeprel == 'cc':
@@ -71,6 +73,8 @@ def process_node(self, node):
7173
node.deprel = 'aux'
7274
elif node.upos == 'VERB':
7375
node.deprel = 'dep'
76+
elif node.upos == 'SCONJ':
77+
node.deprel = 'mark'
7478
elif node.upos == 'X':
7579
node.deprel = 'dep'
7680
elif node.udeprel == 'nummod':

0 commit comments

Comments
 (0)