diff --git a/udapi/block/msf/phrase.py b/udapi/block/msf/phrase.py index 6c5c587a..c92e5e91 100644 --- a/udapi/block/msf/phrase.py +++ b/udapi/block/msf/phrase.py @@ -59,6 +59,8 @@ def get_polarity(self, node, neg): return 'Neg' def get_is_reflex(self,node,refl): + if node.feats['Voice'] == 'Mid': + return 'Yes' if len(refl) == 0: return node.feats['Reflex'] return 'Yes' diff --git a/udapi/block/msf/slavic/past.py b/udapi/block/msf/slavic/past.py index 34dcd354..d4f3c7cd 100644 --- a/udapi/block/msf/slavic/past.py +++ b/udapi/block/msf/slavic/past.py @@ -39,7 +39,7 @@ def process_node(self, node): person=node.feats['Person'], number=node.feats['Number'], mood=node.feats['Mood'], - voice='Pass', + voice='Act', #In Polish, impersonal statements are annotated with Voice=Act. In Ukrainian, the Voice feature is missing; therefore, we decided to annotate these phrases with PhraseVoice=Act aspect=node.feats['Aspect'], form=node.feats['VerbForm'], polarity=self.get_polarity(node,neg), diff --git a/udapi/block/msf/slavic/preprocessor.py b/udapi/block/msf/slavic/preprocessor.py index 5d090a2c..e9a5e90b 100644 --- a/udapi/block/msf/slavic/preprocessor.py +++ b/udapi/block/msf/slavic/preprocessor.py @@ -31,8 +31,7 @@ def process_node(self,node): # In Ukrainian, there is no explicit annotation of reflexive verbs # We decided to unify the annotation of reflexive verbs with Russian and Belarusian, where reflexive verbs are formed similarly # We add the feature Voice=Mid to reflexive verbs - # This feature is added only to Ukrainian data (for example, there are some verbs in Old Church Slavonic that end in 'сь' but are not reflexive) - if node.upos == 'VERB' and (node.form.endswith('сь') or node.form.endswith('ся')) and self.lang == 'uk': + if node.upos == 'VERB' and (node.lemma.endswith('сь') or node.lemma.endswith('ся')): node.feats['Voice'] = 'Mid' # makedonstina tvori budouci cas pomoci pomocneho slova ќе, u nejz neni nijak vyznaceno, ze se podili na tvorbe budouciho casu @@ -72,5 +71,9 @@ def process_node(self,node): if node.feats['Mood'] == 'Sub': node.feats['Mood'] = 'Cnd' + # # although infinitives in Old Church Slavonic are annotated with Tense=Pres, they do not convey tense; therefore, we remove this annotation + if node.feats['VerbForm'] == 'Inf': + node.feats['Tense'] = '' + # TODO maybe we want to set Tense=Fut for the perfective verbs with Tense=Pres? This could solve the problem with the simplified detection of the future tense in Czech # but there are many verbs with no Aspect value, so the problem is still there