diff --git a/demo/en-sample.conllu b/demo/en-sample.conllu index 3861e629..3e7e533b 100644 --- a/demo/en-sample.conllu +++ b/demo/en-sample.conllu @@ -979,3 +979,21 @@ 14 ever ever ADV RB _ 12 nmod _ SpaceAfter=No 15 . . PUNCT . _ 4 punct _ _ +1 It it PRON PRP Case=Nom|Gender=Neut|Number=Sing|Person=3|PronType=Prs 3 nsubjpass _ _ +2 is be AUX VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 3 auxpass _ _ +3 caused cause VERB VBN Tense=Past|VerbForm=Part|Voice=Pass 0 root _ _ +4 by by ADP IN _ 8 case _ _ +5 social social ADJ JJ Degree=Pos 8 amod _ _ +6 and and CONJ CC _ 5 cc _ _ +7 economical economical ADJ JJ Degree=Pos 5 conj _ _ +8 factors factor NOUN NNS Number=Plur 3 nmod _ _ +9 . . PUNCT . _ 3 punct _ SpaceAfter=No + +1 The the DET DT Definite=Def|PronType=Art 3 det _ _ +2 strict strict ADJ JJ Degree=Pos 3 amod _ _ +3 judge judge NOUN NN Number=Sing 4 nsubj _ _ +4 left leave VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root _ SpaceAfter=No +5 . . PUNCT . _ 4 punct _ SpaceAfter=No + + + diff --git a/udapi/block/write/textmodetrees.py b/udapi/block/write/textmodetrees.py index 3c9f7308..f06e4b33 100644 --- a/udapi/block/write/textmodetrees.py +++ b/udapi/block/write/textmodetrees.py @@ -2,8 +2,8 @@ import re import sys -import colorama -from termcolor import colored +# import colorama +# from termcolor import colored from udapi.core.basewriter import BaseWriter COLOR_OF = { @@ -238,8 +238,8 @@ def before_process_document(self, document): super().before_process_document(document) if self.color == 'auto': self.color = sys.stdout.isatty() - if self.color: - colorama.init() + # if self.color: + # colorama.init() if self.print_doc_meta: for key, value in sorted(document.meta.items()): print('%s = %s' % (key, value)) @@ -263,14 +263,14 @@ def is_marked(self, node): """Should a given node be highlighted?""" return self.mark_re.search(str(node.misc)) if self.mark_re is not None else False - def colorize_comment(self, comment): + # def colorize_comment(self, comment): """Return a string with color markup for a given comment.""" - if self.mark_re is None: - return comment - return self.mark_re.sub(colored(r'\g<0>', None, None, ['reverse', 'bold']), comment) - - @staticmethod - def colorize_attr(attr, value, marked): - """Return a string with color markup for a given attr and its value.""" - color = COLOR_OF.get(attr, None) - return colored(value, color, None, ['reverse', 'bold'] if marked else None) + # if self.mark_re is None: + # return comment + #return self.mark_re.sub(colored(r'\g<0>', None, None, ['reverse', 'bold']), comment) + + # @staticmethod + #def colorize_attr(attr, value, marked): + # """Return a string with color markup for a given attr and its value.""" + # color = COLOR_OF.get(attr, None) + # return colored(value, color, None, ['reverse', 'bold'] if marked else None) diff --git a/udapi/block/zellig_harris/common.py b/udapi/block/zellig_harris/common.py index 46845c15..ba714e5e 100644 --- a/udapi/block/zellig_harris/common.py +++ b/udapi/block/zellig_harris/common.py @@ -27,4 +27,4 @@ def print_triple(node_a, relation_name, node_b, print_lemma=False): node_b = get_node_representation(node_b, print_lemma=print_lemma) context = u"%s %s_%s" % (node_a, relation_name, node_b) - print(context) + return context diff --git a/udapi/block/zellig_harris/configurations.py b/udapi/block/zellig_harris/configurations.py index c786150e..6b2a97da 100644 --- a/udapi/block/zellig_harris/configurations.py +++ b/udapi/block/zellig_harris/configurations.py @@ -12,35 +12,32 @@ class Configurations(Block): """ - def __init__(self, args=None): + def __init__(self, pos=None, print_lemmas=None, verbose=None): """ Initialization. :param args: A dict of optional parameters. """ - if args is None: - args = {} - # Call the constructor of the parent object. - super(Configurations, self).__init__(args) + super(Configurations, self).__init__() # Process the 'POS' argument. self.pos = [] - if 'pos' in args: - self.pos = args['pos'].split(',') + if pos is not None: + self.pos = pos.split(',') # Process the 'print_lemmas' argument. self.print_lemmas = False - if 'print_lemmas' in args and args['print_lemmas'] == '1': + if print_lemmas is not None and print_lemmas == 1: self.print_lemmas = True # Process the 'print_lemmas' argument. self.verbose = False - if 'verbose' in args and args['verbose'] == '1': + if verbose is not None and verbose == 1: self.verbose = True - def apply_query(self, query_id, node): + def apply_query(self, query_id, node, dictionary_param_1,dictionary_param_2, der_halucinate_param): """ A generic method for applying a specified query on a specified node. @@ -60,7 +57,7 @@ def apply_query(self, query_id, node): triples = [] try: - triples = method(node) + triples = method(node, dictionary_param_1,dictionary_param_2, der_halucinate_param) except ValueError as exception: if self.verbose: logging.info(' - no configurations: %s', exception) @@ -70,8 +67,12 @@ def apply_query(self, query_id, node): logging.info(' - no configurations, but all conditions passed.') for (node_a, relation_name, node_b) in triples: - print_triple(node_a, relation_name, node_b, - print_lemma=self.print_lemmas) + triple = print_triple(node_a, relation_name, node_b, print_lemma=self.print_lemmas) + + if self.verbose: + logging.info(' - %s', triple) + + print(triple) def process_tree(self, tree): """ @@ -83,7 +84,7 @@ def process_tree(self, tree): if self.verbose: logging.info('') logging.info('---') - logging.info('Sentence ID : %s', tree.sent_id) + logging.info('Sentence ID : %s', tree.address()) logging.info('Sentence : %s', ' '.join([node.form for node in tree.descendants()])) logging.info('---') diff --git a/udapi/block/zellig_harris/derinet.py b/udapi/block/zellig_harris/derinet.py new file mode 100644 index 00000000..f2291683 --- /dev/null +++ b/udapi/block/zellig_harris/derinet.py @@ -0,0 +1,382 @@ +class Derivation(object): + def __init__(self, sub_word, part, depth): + self.sub_word = sub_word + self.part = part + self.depth = depth + + +class Word(object): + def __init__(self, lemma,sub_words,derivation): + self.lemma = lemma + self.sub_words = sub_words + self.derivation = derivation + + +class WtypeOfD(object): + def __init__(self, lemma,type_of_deriv): + self.lemma = lemma + self.type_of_deriv = type_of_deriv + + def set_type_of_deriv(self, value): + self.type_of_deriv = value + + + +class Derinet(object): + + words=[] + dict_noun_key_verb_value= {} + dict_verb_key_noun_value = {} + dict_adj_key_adv_value = {} + dict_adv_key_adj_value = {} + dict_adj_key_verb_value = {} + dict_verb_key_adj_value = {} + dict_noun_key_adj_value = {} + dict_adj_key_noun_value = {} + dict_adj_key_adj_value = {} + dict_verb_key_verb_value = {} + dict_noun_key_noun_value = {} + + + #7\aback\59\C\\1\N\N\N\N\Y\a+back\xN\AS\N\N\N\#\N\N\AS\((a)[B|.N],(back)[N])[B]\N\N\N + #24099\irritant\5\C\\1\N\N\N\N\Y\irritate+ant\2x\SA\N\Y\N\-ate#\N\N\SA\((irritate)[V],(ant)[A|V.])[A]\N\Y\N + #24087\irreverently\7\C\\1\N\N\N\N\Y\irreverent+ly\Ax\SA\N\N\N\#\N\N\ASAA\(((ir)[A|.A],((revere)[V],(ent)[A|V.])[A])[A],(ly)[B|A.])[B]\N\N\N + + def __init__(self): + f1 = open("derivations.txt",'w') + with open("eml.cd") as f: + for line in f: + arr = line.split('\\') + lemma = arr[1] + ws = lemma.split(' ') + if ws.__len__() == 1: + derword = arr[11] + sub_words = derword.split('+') + deriving = arr[21] + depth = 0 + deriv = [] + type = False + if deriving.__len__() > 0: + f1.write("\n") + for z in deriving: + if (z == '('): + s = [] + depth = depth + 1 + elif (z == '['): + t = [] + type = True + elif (z == ']'): + type = False + d = Derivation(''.join(s), ''.join(t), depth) + f1.write(''.join(s)+" ") + f1.write(''.join(t)+" ") + f1.write(str(depth)+" ") + s = [] + deriv.append(d) + elif ((z >= 'a') and (z <= 'z')) or ((z >= 'A') and (z <= 'Z') or (z == '.') or (z == '|')): + if not type: + l = z + s.append(l) + else: + l = z + t.append(l) + elif z == ')': + depth = depth - 1 + w = Word(lemma, sub_words, deriv) + self.words.append(w) + f.close() + f1.close() + + for w in self.words: + wc=w.derivation[-1].part + if len(w.sub_words) == 1: + if len(w.derivation)>=2 and ((w.derivation[-2].sub_word) == '' or (w.derivation[-2].sub_word == w.sub_words[0])): + derived_word=w.sub_words[0] + if (w.derivation[-2].part == 'V') and (wc == 'N'): + self.add_word_to_dictionary(self.dict_noun_key_verb_value, self.dict_verb_key_noun_value, w.lemma, derived_word, 'pos_conversion') + + elif (w.derivation[-2].part == 'N') and (wc == 'V'): + self.add_word_to_dictionary(self.dict_verb_key_noun_value, self.dict_noun_key_verb_value, w.lemma, derived_word, 'pos_conversion') + + elif (w.derivation[-2].part == 'A') and (wc == 'B'): + self.add_word_to_dictionary(self.dict_adv_key_adj_value, self.dict_adj_key_adv_value, w.lemma, derived_word, 'pos_conversion') + + elif (w.derivation[-2].part == 'N') and (wc == 'A'): + self.add_word_to_dictionary(self.dict_adj_key_noun_value, self.dict_noun_key_adj_value, w.lemma, derived_word, 'pos_conversion') + + elif (w.derivation[-2].part == 'V') and (wc == 'A'): + self.add_word_to_dictionary(self.dict_adj_key_verb_value, self.dict_verb_key_adj_value, w.lemma, derived_word, 'pos_conversion') + + + for d in w.derivation: + if w.sub_words[-1] == d.sub_word: + dp = d.part.split('|') + if len(dp) > 1: + derived_word = ''.join(w.sub_words[:-1]) + if dp[1] == 'A.' and wc =='V': + self.add_word_to_dictionary(self.dict_verb_key_adj_value, self.dict_adj_key_verb_value, w.lemma, derived_word, 'suffix') + + if dp[1] == 'V.' and wc == 'N': + self.add_word_to_dictionary(self.dict_noun_key_verb_value, self.dict_verb_key_noun_value, w.lemma, derived_word, 'suffix') + + elif dp[1] == 'N.' and wc == 'V': + self.add_word_to_dictionary(self.dict_verb_key_noun_value, self.dict_noun_key_verb_value, w.lemma, derived_word, 'suffix') + + elif dp[1] == 'A.' and wc == 'B': + self.add_word_to_dictionary(self.dict_adv_key_adj_value, self.dict_adj_key_adv_value, w.lemma, derived_word, 'suffix') + + elif dp[1] == 'N.' and wc == 'A': + self.add_word_to_dictionary(self.dict_adj_key_noun_value, self.dict_noun_key_adj_value, w.lemma, derived_word, 'suffix') + + elif dp[1] == 'V.' and wc == 'A': + self.add_word_to_dictionary(self.dict_adj_key_verb_value, self.dict_verb_key_adj_value, w.lemma, derived_word, 'suffix') + + if (w.sub_words[0] == d.sub_word) and (w.sub_words[0] in [ 'in', 'un', 'non', 'de', 'dis', 'a', 'anti', 'im', 'il','ir','mis']): + dp = d.part.split('|') + if len(dp) > 1: + derived_word = ''.join(w.sub_words[1:]) + if dp[1] == '.A' and wc == 'A': + self.add_word_to_dictionary(self.dict_adj_key_adj_value, self.dict_adj_key_adj_value, w.lemma, derived_word, 'neg_prefix') + + if (w.sub_words[0] == d.sub_word) and (w.sub_words[0] in ['in', 'un', 'non', 'de', 'dis','anti', 'im', 'il', 'ir', 'mis']): + dp = d.part.split('|') + if len(dp) > 1: + derived_word = ''.join(w.sub_words[1:]) + if dp[1] == '.V' and wc == 'V': + self.add_word_to_dictionary(self.dict_verb_key_verb_value, self.dict_verb_key_verb_value, w.lemma, derived_word,'neg_prefix') + + if (w.sub_words[0] == d.sub_word) and (w.sub_words[0] in ['in', 'un', 'non', 'de', 'dis','anti', 'im', 'il', 'ir', 'mis']): + dp = d.part.split('|') + if len(dp) > 1: + derived_word = ''.join(w.sub_words[1:]) + if dp[1] == '.N' and wc == 'N': + self.add_word_to_dictionary(self.dict_noun_key_noun_value, self.dict_noun_key_noun_value, w.lemma, derived_word,'neg_prefix') + + + + + + def add_word_to_dictionary(self, dictionary_lemma_key, dictionary_derived_word_key, lemma, derived_word, type): + wtdl = WtypeOfD(lemma,type) + wtdd = WtypeOfD(derived_word,type) + try: + if derived_word in dictionary_lemma_key[lemma]: + dictionary_lemma_key[lemma].append(wtdd) + except: + dictionary_lemma_key[lemma] = [] + dictionary_lemma_key[lemma].append(wtdd) + try: + if lemma not in dictionary_derived_word_key[derived_word]: + dictionary_derived_word_key[derived_word].append(wtdl) + except: + dictionary_derived_word_key[derived_word] = [] + dictionary_derived_word_key[derived_word].append(wtdl) + + + def get_verb_from_noun(self,noun,type): + verb = [] + try: + for v in self.dict_noun_key_verb_value[noun]: + if v.type_of_deriv in type: + verb.append(v) + if 'neg_prefix' in type: + try: + neg_verbs = self.dict_verb_key_verb_value(v.lemma) + except: + neg_verbs = [] + verb.extend(neg_verbs) + + except: + verb = [] + return verb + + + def get_noun_from_verb(self,verb,type): + neg_nouns = [] + noun = [] + types=[] + lemmas=[] + try: + for n in self.dict_verb_key_noun_value[verb]: + if n.type_of_deriv in type: + noun.append(n) + types.append(n.type_of_deriv) + if 'neg_prefix' in type: + try: + neg_verbs = self.dict_verb_key_verb_value[verb] + for neg_verb in neg_verbs: + if neg_verb.type_of_deriv == 'neg_prefix': + try: + neg_nouns_part=self.dict_verb_key_noun_value[neg_verb.lemma] + for neg_noun in neg_nouns_part: + if neg_noun.type_of_deriv in types: + if neg_noun.lemma not in lemmas: + neg_noun_new= WtypeOfD(neg_noun.lemma,'neg_prefix') + neg_nouns.append(neg_noun_new) + lemmas.append(neg_noun.lemma) + + except: + neg_nouns=[] + except: + neg_nouns = [] + noun.extend(neg_nouns) + + except: + noun = [] + return noun + + + def get_adv_from_adj(self, adj,type): + adv = [] + lemmas = [] + try: + for av in self.dict_adj_key_adv_value[adj]: + if av.type_of_deriv in type: + adv.append(av) + if 'neg_prefix' in type: + try: + neg_adjs=self.dict_adj_key_adj_value[adj] + for neg_adj in neg_adjs: + try: + neg_advs=self.dict_adj_key_adv_value[neg_adj.lemma] + except: + neg_advs=[] + for neg_adv in neg_advs: + if neg_adv.lemma not in lemmas: + neg_adv_new= WtypeOfD(neg_adv.lemma,'neg_prefix') + adv.append(neg_adv_new) + lemmas.append(neg_adv.lemma) + + except: + neg_adjs=[] + except: + adv = [] + return adv + + + def get_adj_from_adv(self, adv, type): + adj = [] + try: + for aj in self.dict_adv_key_adj_value[adv]: + if aj.type_of_deriv in type: + adj.append(aj) + if 'neg_prefix' in type: + try: + neg_adjs = self.dict_adj_key_adj_value[aj.lemma] + except: + neg_adjs = [] + adj.extend(neg_adjs) + except: + adj = [] + return adj + + + def get_adj_from_verb(self, verb,type): + adj = [] + lemmas = [] + try: + try: + adjs=self.dict_verb_key_adj_value[verb] + except: + adjs=[] + for aj in adjs: + if aj.lemma not in lemmas: + lemmas.append(aj.lemma) + try: + nouns = self.dict_verb_key_noun_value[verb] + except: + nouns = [] + + for noun in nouns: + try: + adj_from_noun = self.dict_noun_key_adj_value[noun.lemma] + except: + adj_from_noun = [] + + for afn in adj_from_noun: + if afn.lemma not in lemmas: + lemmas.append(afn.lemma) + adjs.append(afn) + + for aj in adjs: + if aj.type_of_deriv in type: + adj.append(aj) + if 'neg_prefix' in type: + try: + neg_adjs = self.dict_adj_key_adj_value[aj.lemma] + except: + neg_adjs = [] + for n_a in neg_adjs: + if n_a.lemma not in lemmas: + lemmas.append(n_a.lemma) + adjs.append(n_a) + except: + adj = [] + return adj + + + def get_verb_from_adj(self, adj,type): + verb=[] + verbs = [] + lemmas = [] + try: + try: + verbs = self.dict_adj_key_verb_value[adj] + except: + verbs = [] + + for v in verbs: + if v.lemma not in lemmas: + lemmas.append(v.lemma) + try: + nouns = self.dict_adj_key_noun_value[adj] + except: + nouns=[] + + for noun in nouns: + try: + verb_from_noun = self.dict_noun_key_verb_value[noun.lemma] + except: + verb_from_noun = [] + + for vfn in verb_from_noun: + if vfn.lemma not in lemmas: + lemmas.append(vfn.lemma) + verbs.append(vfn) + + for v in verbs: + if ((v.type_of_deriv in type) and (v.lemma not in verb)): + verb.append(v) + if 'neg_prefix' in type: + try: + neg_verbs = self.dict_verb_key_verb_value[v.lemma] + except: + neg_verbs = [] + for n_v in neg_verbs: + if n_v.lemma not in lemmas: + lemmas.append(v.lemma) + verb.append(v) + except: + verb = [] + return verb + + + def get_neg_adj_from_adj(self, adj, type): + adjs = [] + try: + for aj in self.dict_adj_key_adj_value[adj]: + if ((aj.type_of_deriv in type) and (aj not in adjs)): + adjs.append(aj) + except: + adjs=[] + return adjs + + + + + + + + + + diff --git a/udapi/block/zellig_harris/enhancedeps.py b/udapi/block/zellig_harris/enhancedeps.py index 656c1e93..9deecc34 100644 --- a/udapi/block/zellig_harris/enhancedeps.py +++ b/udapi/block/zellig_harris/enhancedeps.py @@ -1,120 +1,242 @@ #!/usr/bin/env python from udapi.core.block import Block +from udapi.block.zellig_harris.morphotests import * -def eparent(node): - """ - Return an effective parent for the given node. +eparent_echildren_deprels = ['nsubj', 'nsubjpass', 'dobj', 'iobj', 'csubj', 'csubjpass', 'goeswith', 'mwe', + 'compound', 'list', 'dislocated', 'parataxis', 'remnant', 'reparandum', 'cc', 'conj'] + - The rule for the effective parent - when the current node A has a deprel 'conj' to its parent B, - return B.parent, otherwise return A.parent. +def eparents(node): + """ + Return a list of effective parents for the given node. :param node: An input node. - :return: An effective parent. - :rtype: udapi.core.node.Node + :return: A list of effective parents. + :rtype: list """ + # Rule (1): When node.deprel == conj, its effective parents are equal to its parent. if node.deprel == 'conj': - return node.parent.parent + return eparents(node.parent) + + # Rule (2): Append the real parent and look for its coordinated nodes. + final_eparents = [node.parent] + node_true_deprel = true_deprel(node) + for candidate_eparent in node.parent.children: + if candidate_eparent.deprel == 'conj': + if node_true_deprel in eparent_echildren_deprels: + if node_true_deprel in [node.deprel for node in candidate_eparent.children]: + continue + + final_eparents.append(candidate_eparent) + + return final_eparents + + +def true_deprel(node): + """ + for conjunct nodes (second+ coordination members): + Function gets deprel of the first coordination member + :param node: + :return: string + """ + if node.deprel != 'conj': + return node.deprel - return node.parent + return true_deprel(node.parent) + + +def schildren(node): + """ + If the input node is a second+ member of a coordination (deprel 'conj'), + the function will return a list of modifiers of its immediate parent + (the first coordination member). + Example: 'John will sing and dance': 'John' is shared child of 'dance'. + Children of the first coordination member that are to the right of it + are ignored. Example: 'John will sing at performances and dance'. At performances + will not be recorded as a shared child of dance. + Left-hand-side children of the first coordination member that are subject, object + or 'complicated' elements, such as list mwe, cc, and conj, are not recorded as shared children + of the conjunct when the conjunct has its own child with the same deprel. + :param node: + :return: list + """ + if node.deprel != 'conj': + return [] + + mynodes_parent = node.parent + sharechild_candidates = mynodes_parent.children + + # Eliminating candidates according to their ordering. + for candidate in sharechild_candidates: + if mynodes_parent.ord < candidate.ord < node.ord: + sharechild_candidates.remove(candidate) # eliminates 'in the morning' from + # John sings *in the morning* and never dances. + + mynodes_children = node.children + mynodes_children_true_deprels = [] + for mynodes_child in mynodes_children: + true_mynodes_child_deprel = true_deprel(mynodes_child) + mynodes_children_true_deprels.append(true_mynodes_child_deprel) #List of true deprels of my node's children + + for candidate in sharechild_candidates: + if true_deprel(candidate) in mynodes_children_true_deprels \ + and true_deprel(candidate) in eparent_echildren_deprels: # we deliberately ignore second+ conjuncts of + # shared children + # i.e. from "John and Mary dance and sing", we will only find "John" as subject of "sing". + sharechild_candidates.remove(candidate) + + return sharechild_candidates def echildren(node): """ - Return a list with node's effective children. + Gets all descendants of a node that are either its children or conjuncts of its children. + Just one level down, no recursivity, as UD do not support nested coordinations anyway. :param node: An input node. - :return: A list with node's effective children. - :rtype: list + :return: A list of node's effective children. + + """ + echildren_list = [] + for real_child in node.children: + if real_child.deprel != 'conj': + echildren_list.append(real_child) + + for child in real_child.children: + if child.deprel == 'conj': + echildren_list.append(child) + + return echildren_list +##vincent +# def echildren(node): +# if node.deprel != 'conj': +# target = node +# if node.deprel == 'conj': +# target = node.parent +# immediate_children_list = target.children +# all_children_list = immediate_children_list +# for imchild in immediate_children_list: +# descendants_list = imchild.children +# if imchild == target: +# continue +# descendants_list = imchild.children +# for descendant in descendants_list: +# if descendant.deprel == 'conj': +# all_children_list.append(descendant) +# if len(all_children_list) == 0: +# raise ValueError('No echildren.') +# return all_children_list +# +# + + + + +def eschildren(node): """ - target_deprels = ['subj', 'subjpass', 'dobj', 'iobj', 'compl'] - node_parent = eparent(node) - echildren_list = [child for child in node.children] + Obtain echildren, schildren and combine them into one list. - for candidate_child in node_parent.children: - # Check if a candidate node C has the target deprel. - if candidate_child.deprel not in target_deprels: - continue + :param node: An input nude. + :return: list - # Check if such deprel is not in the current node children already. - no_such_deprel = True - for current_child in node.children: - if current_child.deprel == candidate_child.deprel: - no_such_deprel = False - break + """ + output_list = echildren(node) + for schild in schildren(node): + if schild in output_list: + raise ValueError('Shared child appears in echildren list: %r', schild) - # If there is no such deprel, we can add a new secondary dependence. - if no_such_deprel: - echildren_list.append(candidate_child) + output_list.append(schild) - return echildren_list + return output_list -def enhance_deps(node, new_dependence): +def en_verb_controller_YN(node): """ - Add a new dependence to the node.deps, but firstly check - if there is no such dependence already. + Tells whether the input verb node controls another verb - :param node: A node to be enhanced. - :param new_dependence: A new dependence to be add into node.deps. + :param node: An input node. + :return: boolean """ - for existing_dependence in node.deps: - if existing_dependence['parent'] == new_dependence['parent'] and \ - existing_dependence['deprel'] == new_dependence['deprel']: - return + if node.upos != 'VERB': + raise ValueError('Is not a verb.') + result = False + verb_echildren_list = echildren(node) + for verb_echild in verb_echildren_list: + if true_deprel(verb_echild) == 'xcomp' and verb_echild.upos == 'VERB': + result = True + break + return result + + +def en_verb_controllee_YN(node): + """ + Tells whether the input verb is a controlled verb - node.deps.append(new_dependence) + :param node: An input node. + :return: boolean + + """ + if node.upos != 'VERB': + raise ValueError('Is not a verb.') + result = False + if true_deprel(node) == 'xcomp': + result = True + return result -class EnhanceDeps(Block): +def en_verb_finite_form_YN(node): """ - Identify new relations between nodes in the dependency tree - (an analogy of effective parents/children from PML). - Add these new relations into secondary dependencies slot. + Says whether whether a verb node has finite form, + taking into account analytical verb forms, + unlike UD tagset. + + :param node: An input node. + :return: boolean """ + if node.upos != 'VERB': + raise ValueError('Is not a verb.') + + if node.feats['VerbForm'] == 'Fin': + return True + + + if node.feats['VerbForm'] not in ['Inf', 'Part', 'Ger']: + raise ValueError('Unexpected VerbForm.') + + if node.deprel == 'xcomp': + return True - def process_node(self, node): - """ - Enhance secondary dependencies by application of the following rules: - 1. when the current node A has a deprel 'conj' to its parent B, - create a new secondary dependence (B.parent, B.deprel) to A - 2. when the current node A has a deprel 'conj' to its parent B, look at B.children C - when C.deprel is in {subj, subjpass, iobj, dobj, compl} and there is no A.children D - such that C.deprel == D.deprel, add a new secondary dependence (A, C.deprel) to C + echildren_list = echildren(node) + for echild in echildren_list: + if echild.upos == 'AUX': + return True - :param node: A node to be process. + return False - """ - # Both rules require node.deprel to be 'conj'. - if node.deprel != 'conj': - return - # Node's parent should not be root. - if node.parent.is_root(): - return +def en_verb_passive_form_YN(node): + """ + Says + + :param node: An input node. + :return: boolean + + """ + if node.upos != 'VERB': + raise ValueError('Is not a verb.') - # Apply rule (1) - enhance_deps(node, {'parent': node.parent.parent, 'deprel': node.parent.deprel}) + if node.feats['Voice'] == 'Pass': + return True + elif node.feats['VerbForm'] == 'Part' and node.feats['Tense'] == 'Past': + return True + else: + return False - # Apply rule (2) - target_deprels = ['subj', 'subjpass', 'dobj', 'iobj', 'compl'] - for candidate_child in node.parent.children: - # Check if a candidate node C has the target deprel. - if candidate_child.deprel not in target_deprels: - continue - # Check if such deprel is not in the current node children already. - no_such_deprel = True - for current_child in node.children: - if current_child.deprel == candidate_child.deprel: - no_such_deprel = False - break - # If there is no such deprel, we can add a new secondary dependence. - if no_such_deprel: - enhance_deps(candidate_child, {'parent': node, 'deprel': candidate_child.deprel}) diff --git a/udapi/block/zellig_harris/ennouns.py b/udapi/block/zellig_harris/ennouns.py index eed9ffde..7def4859 100644 --- a/udapi/block/zellig_harris/ennouns.py +++ b/udapi/block/zellig_harris/ennouns.py @@ -1,12 +1,11 @@ import logging - from udapi.core.block import Block - from udapi.block.zellig_harris.configurations import * from udapi.block.zellig_harris.queries import * class EnNouns(Configurations): + """ A block for extraction context configurations for English nouns. @@ -21,12 +20,26 @@ def process_node(self, node): :param node: A node to be process. """ - # We want to extract contexts only for verbs. - if str(node.upos) not in self.pos: - return + # We want to extract contexts only for nouns. + #if str(node.upos) not in self.pos: + # return if self.verbose: logging.info('') - logging.info('Processing node %s/%s', node.root.sent_id, node) - - self.apply_query('en_verb_mydobj', node) + logging.info('Processing node %s/%s', node.root.address(), node) + + if str(node.upos) == 'VERB': + self.apply_query('en_nouns_001b_der_V1_ADVx__ADJx_N1', node,['neg_prefix','suffix','pos_conversion'], ['neg_prefix','suffix','pos_conversion'],['halucinate']) + if str(node.upos) == 'NOUN': + self.apply_query('en_nouns_002a_der_V1_NX__Nx_N1', node, ['neg_prefix', 'suffix', 'pos_conversion'], None, ['halucinate']) + self.apply_query('en_nouns_003b_der_V1_Nx__ADJ1_Nx', node, ['neg_prefix', 'suffix', 'pos_conversion'], None, ['halucinate']) + self.apply_query('en_nouns_004a_der_V1_prepNX__N1_prepNx', node, ['neg_prefix', 'suffix', 'pos_conversion'], None, ['halucinate']) + self.apply_query('en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', node, ['neg_prefix', 'suffix', 'pos_conversion'], None, ['halucinate']) + + self.apply_query('en_nouns_001b_003b_005_ADJx_N1',node, None, None,None) + self.apply_query('en_nouns_002a_Nx_N1', node, None, None, None) + self.apply_query('en_nouns_004a_N1_prepNx',node, None, None, None) + + if str(node.upos) == 'PROPN': + self.apply_query('en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', node, ['neg_prefix', 'suffix', 'pos_conversion'], None, ['halucinate']) + self.apply_query('en_nouns_001b_003b_005_ADJx_N1', node, None, None, None) \ No newline at end of file diff --git a/udapi/block/zellig_harris/enverbs.py b/udapi/block/zellig_harris/enverbs.py index 18dea104..9852fae2 100644 --- a/udapi/block/zellig_harris/enverbs.py +++ b/udapi/block/zellig_harris/enverbs.py @@ -22,11 +22,34 @@ def process_node(self, node): """ # We want to extract contexts only for verbs. - if str(node.upos) not in self.pos: - return + #if str(node.upos) not in self.pos: + # return if self.verbose: logging.info('') logging.info('Processing node %s/%s', node.root.sent_id, node) + # logging.info('Processing node %s/%s', node.root.sent_id, node) + + + + if str(node.upos) == 'NOUN': + self.apply_query('en_verbs_001a_der_ADJx_N1__V1_ADVx', node,['neg_prefix','suffix','pos_conversion'],['neg_prefix','suffix','pos_conversion'],['halucinate']) + self.apply_query('en_verbs_002b_der_Nx_N1__V1_Nx',node,['neg_prefix','suffix','pos_conversion'],None,['halucinate']) + self.apply_query('en_verbs_004b_der_N1_prepNx__V1_prepNx',node,['neg_prefix','suffix','pos_conversion'],None,['halucinate']) + + self.apply_query('en_verbs_002b_003a_V1_Nx', node, None, None, None) + self.apply_query('en_verbs_004b_V1_prepNx', node, None, None, None) + if str(node.upos) == 'ADJ': + self.apply_query('en_verbs_003a_der_ADJ1_Nx__V1_Nx',node,['neg_prefix','suffix','pos_conversion'],None,['halucinate']) + + if str(node.upos) == 'VERB': + self.apply_query('en_verbs_001a_V1_ADVx', node, None, None, None) + + + + # self.apply_query('en_verb_has_iobj_is_relclActive', node) + # self.apply_query('en_verb_has_iobj_is_relclPassive', node) + # self.apply_query('en_verb_has_dobj_is_relclPassive', node) + # self.apply_query('en_verb_has_dobj_is_relclActive', node) + - self.apply_query('en_verb_mydobj', node) diff --git a/udapi/block/zellig_harris/evaluation.py b/udapi/block/zellig_harris/evaluation.py new file mode 100644 index 00000000..eea6ed48 --- /dev/null +++ b/udapi/block/zellig_harris/evaluation.py @@ -0,0 +1,85 @@ +from udapi.core.node import * + +class Evaluation: + + + def __init__(self): + + self.files = [] + self.files_neg = [] + self.files.append(open("en_nouns_001b_der_V1_ADVx__ADJx_N1.csv", 'a+')) + self.files.append(open("en_nouns_002a_der_V1_NX__Nx_N1.csv", 'a+')) + self.files.append(open("en_nouns_003b_der_V1_Nx__ADJ1_Nx.csv",'a+')) + self.files.append(open("en_nouns_004a_der_V1_prepNX__N1_prepNx.csv",'a+')) + self.files.append(open("en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1.csv",'a+')) + #self.files.append(open('en_verbs_001a_der_ADJx_N1__V1_ADVx.csv','a+')) + #self.files.append(open('en_verbs_002b_der_Nx_N1__V1_Nx.csv','a+')) + #self.files.append(open('en_verbs_004b_der_N1_prepNx__V1_prepNx.csv','a+')) + #self.files.append(open('en_verbs_003a_der_ADJ1_Nx__V1_Nx.csv','a+')) + + self.files_neg.append(open("en_nouns_001b_der_V1_ADVx__ADJx_N1-neg.csv", 'a+')) + self.files_neg.append(open("en_nouns_002a_der_V1_NX__Nx_N1-neg.csv", 'a+')) + self.files_neg.append(open("en_nouns_003b_der_V1_Nx__ADJ1_Nx-neg.csv",'a+')) + self.files_neg.append(open("en_nouns_004a_der_V1_prepNX__N1_prepNx-neg.csv",'a+')) + self.files_neg.append(open("en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1-neg.csv",'a+')) + #self.files_neg.append(open('en_verbs_001a_der_ADJx_N1__V1_ADVx-neg.csv', 'a+')) + #self.files_neg.append(open('en_verbs_002b_der_Nx_N1__V1_Nx-neg.csv', 'a+')) + #self.files_neg.append(open('en_verbs_004b_der_N1_prepNx__V1_prepNx-neg.csv', 'a+')) + #self.files_neg.append(open('en_verbs_003a_der_ADJ1_Nx__V1_Nx-neg.csv', 'a+')) + + self.file_real_triples=open("real_triples.csv",'a+') + + for f in self.files: + f.write("Sentence\t"+"Name of function\t"+"Triple\t"+"1.word-derivation\t"+"2.word-derivation\t"+"1.word-upos-orig\t"+"2.word-upos-orig\t"+"1.word-upos-new\t"+"2.word-upos-new\n") + for f in self.files_neg: + f.write("Sentence\t"+"1.word\t"+"2.word\t"+"Comment\n") + + self.file_real_triples.write("Sentence\t"+"Name of function\t"+"Triple\t"+"1.word-upos\t"+"2.word-upos\n") + + def evaluate_triple(self,node,name_of_function,w1,rel,w2,deriv_1,deriv_2,upos_1_orig,upos_2_orig,upos_1_new,upos_2_new): + for f in self.files: + if f.name.split('.')[0] == name_of_function: + f.write(node.root.get_sentence()+"\t") + f.write(name_of_function+"\t") + f.write(w1.lemma+" ") + f.write(rel + " ") + f.write(w2.lemma+"\t") + f.write(deriv_1 + "\t") + f.write(deriv_2 + "\t") + f.write(upos_1_orig + "\t") + f.write(upos_2_orig + "\t") + f.write(upos_1_new + "\t") + f.write(upos_2_new + "\n") + + def evaluate_neg(self,node,name_of_function,word,comment): + for f in self.files_neg: + if f.name.split('-')[0] == name_of_function: + f.write(node.root.get_sentence() + "\t") + f.write(node.lemma + "\t") + try: + f.write(word.lemma + "\t") + except: + f.write("None" + "\t") + + f.write(comment + "\n") + + def evaluate_real(self,node,name_of_function,w1,rel,w2): + self.file_real_triples.write(node.root.get_sentence()+"\t") + self.file_real_triples.write(name_of_function + "\t") + self.file_real_triples.write(w1.lemma + " ") + self.file_real_triples.write(rel + " ") + self.file_real_triples.write(w2.lemma + "\t") + self.file_real_triples.write(w1.upos + "\t") + self.file_real_triples.write(w2.upos + "\n") + + + + + + + + + + + + diff --git a/udapi/block/zellig_harris/morphotests.py b/udapi/block/zellig_harris/morphotests.py new file mode 100644 index 00000000..a42b53a9 --- /dev/null +++ b/udapi/block/zellig_harris/morphotests.py @@ -0,0 +1,51 @@ +import logging + +from udapi.block.zellig_harris.enhancedeps import * + + +def en_verb_finite_form_YN(node): + ''' + Says whether whether a verb node has finite form, + taking into account analytical verb forms, + unlike UD tagset. + :param node: + :return: boolean + ''' + + if node.upos != 'VERB': + raise ValueError('Is not a verb.') + + if node.feats['VerbForm'] == 'Fin': + return True + + if node.feats['VerbForm'] not in ['Inf', 'Part', 'Ger']: + raise ValueError('Unexpected VerbForm.') + + if node.deprel == 'xcomp': + return True + + echildren_list = echildren(node) + for echild in echildren_list: + if echild.upos == 'AUX': + return True + + return False + + +def en_verb_passive_form_YN(node): + ''' + Says + :param node: + :return: boolean +''' + + if node.upos != 'VERB': + raise ValueError('Is not a verb.') + + if node.feats['Voice'] == 'Pass': + return True + + if node.feats['VerbForm'] == 'Part' and node.feats['Tense'] == 'Past': + return True + + return False diff --git a/udapi/block/zellig_harris/queries.py b/udapi/block/zellig_harris/queries.py index 09d4f469..e04a7041 100644 --- a/udapi/block/zellig_harris/queries.py +++ b/udapi/block/zellig_harris/queries.py @@ -1,6 +1,16 @@ +import logging + from udapi.block.zellig_harris.enhancedeps import * +from udapi.block.zellig_harris.morphotests import * +from udapi.core.node import * +from udapi.block.zellig_harris.derinet import * +from udapi.block.zellig_harris.evaluation import * +der = Derinet() +eval = Evaluation() +# this is a mock function +# - it technically works, but linguistically it's nonsense def en_verb_mydobj(node): """ Extract the 'myobj' relation. @@ -9,10 +19,10 @@ def en_verb_mydobj(node): if node.upos != 'VERB': raise ValueError('Is not a verb.') - if node.feats.get('Tense', '') != 'Past': + if node.feats['Tense'] != 'Past': raise ValueError('Is not in the past tense.') - if node.feats.get('VerbForm', '') != 'Part': + if node.feats['VerbForm'] != 'Part': raise ValueError('Is not in a participle form.') triples = [] @@ -24,3 +34,693 @@ def en_verb_mydobj(node): triples.append((node, 'dobj', child_node)) return triples + + +def en_noun_is_dobj_of(node): + """ + + :param node: + :return: + """ + my_revert = en_verb_mydobj(node) # pole trojic + my_reverted = [] + for triple in my_revert: + my_reverted.append((triple[2], triple[1], triple[0])) + return my_reverted + + +# Silvie does not alter the above functions until understood how they work. +# Creating new ones instead if these work improperly. + +####Silvie's functions +# def en_noun_is_subj_relcl(node): +# ''' +# Extract the 'nsubj' relation from a relative clause. +# Example: the man who called me yesterday (-> 'man' is subject of 'call') +# :param node: +# :return: n-tuple containing triples +# ''' +# +# if node.upos not in ['PROPN', 'NOUN']: +# raise ValueError('Is not a noun.') +# +# relcl_verbs_list = [] +# mynode_echildren_list = echildren(node) +# logging.info('Echildren for node %s: %r', node, [node.form for node in mynode_echildren_list]) +# +# for mynode_echild in mynode_echildren_list: +# if true_deprel(mynode_echild) == 'acl:relcl': +# relcl_verbs_list.append(mynode_echild) +# +# if len(relcl_verbs_list) == 0: +# raise ValueError('Not subject of any relative clause') +# +# triples = [] +# for relcl_verb in relcl_verbs_list: +# # if en_verb_passive_form_YN(relcl_verb): +# # logging.info('Passive form for candidate verb %s', relcl_verb) +# # continue +# +# wrong_subjects_list = echildren(relcl_verb) +# logging.info('Candidate: %s, %r', relcl_verb, [node.form for node in wrong_subjects_list]) +# for wrong_subject in wrong_subjects_list: +# if true_deprel(wrong_subject) not in ['nsubj', 'nsubjpass', 'csubj','csubjpass'] and wrong_subject.lemma not in ['where', 'how', 'why', 'when']: +# wrong_subjects_list.remove(wrong_subject) +# +# for wrong_subject in wrong_subjects_list: +# if true_deprel(wrong_subject) in ['nsubjpass', 'csubj', 'csubjpass']: +# raise ValueError('Verb has its own regular subject - passive or clausal') +# +# if wrong_subject.lemma in ['where', 'how', 'why', 'when']: +# raise ValueError('Noun is an adverbial, not subject.') +# +# if wrong_subject.deprel == 'nsubj' and wrong_subject.feats['PronType'] != 'Rel': +# raise ValueError('Verb has its own subject.01') +# +# if len(wrong_subjects_list) == 0: # NB when recycling this script for extraction of other arguments from relclauses. +# # For object relclauses the list will have to be empty of potential objects!!! +# raise ValueError('Subject-relative clause must contain a relative pronoun subject!') +# +# triples.append((node, 'nsubj', relcl_verb)) +# with open('C:\log_SILVIE.txt', 'a') as file: +# file.write('haha') +# +# return triples +# + +"""Iveta - en_verb_has_subject_is_relcl""" + + +def en_verb_has_subject_is_relcl(node): + if node.upos != 'VERB': + raise ValueError('It is not a verb.') + if true_deprel(node) != 'acl:relcl': + raise ValueError('It is not a relative clause.') + ekids_controlleesAct_list = [] + ekids_controlleesPass_list = [] + ekids_controlleesPass_dobj_list = [] + ekids_controlleesPass_iobj_list = [] + ekids_01_list = echildren(node) + if en_verb_controller_YN(node): + for ekid_01 in ekids_01_list: + # print(true_deprel(ekid_01), ekid_01.lemma) + if true_deprel(ekid_01) == 'xcomp' and ekid_01.upos == 'VERB': + if not (en_verb_passive_form_YN(ekid_01)): + ekids_controlleesAct_list.append(ekid_01) + else: + ekids_controlleesPass_list.append(ekid_01) + + for ekid in ekids_controlleesPass_list: + dobj_bool = False + eskids = eschildren(ekid) + for eskid in eskids: + if eskid.deprel in ('dobj', 'ccomp') or (eskid.deprel == 'xcomp' and not (eskid.lemma in ('call', 'consider'))): + dobj_bool = True + ekids_controlleesPass_iobj_list.append(ekid) + continue + if not(dobj_bool): + ekids_controlleesPass_dobj_list.append(ekid) + + + + ekids_list = ekids_01_list + relsubjs_list = [] + for ekid in ekids_list: + if true_deprel(ekid) == 'nsubj' and ekid.feats['PronType'] == 'Rel': + relsubjs_list.append(ekid) + if len(relsubjs_list) == 0: + raise ValueError('Relative clause, but not subject relclause.') + epar_list = eparents(node) + triples = [] + for epar in epar_list: + if epar.upos in ('NOUN', 'PROPN'): + triples.append((node, 'nsubj', epar)) + for ekid_controllee in ekids_controlleesAct_list: + triples.append((ekid_controllee, 'nsubj', epar)) + for ekid_controllee in ekids_controlleesPass_dobj_list: + triples.append((ekid_controllee, 'dobj', epar)) + for ekid_controllee in ekids_controlleesPass_iobj_list: + triples.append((ekid_controllee, 'iobj', epar)) + return triples + + + + +def en_verb_has_iobj_is_relclActive(node): #does not check controlled werbs + if node.upos != 'VERB': + raise ValueError('It is not a verb.') + if true_deprel(node) != 'acl:relcl': + raise ValueError('It is not a relative clause.') + # ekids_controllees_list = [] + # if en_verb_controller_YN(node): + # ekids_01_list = echildren(node) + + # for ekid_01 in ekids_01_list: + # if true_deprel(ekid_01) == 'xcomp' and not (en_verb_passive_form_YN(ekid_01)): + # ekids_controllees_list.append(ekid_01) + + ekids_list = echildren(node) + reliobjs_list = [] + for ekid in ekids_list: + if true_deprel(ekid) == 'iobj' and ekid.feats['PronType'] == 'Rel': + reliobjs_list.append(ekid) + if len(reliobjs_list) == 0: + raise ValueError('Relative clause, but not iobject relclause.') + epar_list = eparents(node) + triples =[] + for epar in epar_list: + if epar.upos in ('NOUN', 'PROPN'): + triples.append((node, 'iobj', epar)) + # for ekid_controllee in ekids_controllees_list: + # triples.append((ekid_controllee, 'nsubj', epar)) + return triples + + +def en_verb_has_iobj_is_relclPassive(node): # does not check controlled werbs + if node.upos != 'VERB': + raise ValueError('It is not a verb.') + if true_deprel(node) != 'acl:relcl': + raise ValueError('It is not a relative clause.') + # ekids_controllees_list = [] + # if en_verb_controller_YN(node): + # ekids_01_list = echildren(node) + + # for ekid_01 in ekids_01_list: + # if true_deprel(ekid_01) == 'xcomp' and not (en_verb_passive_form_YN(ekid_01)): + # ekids_controllees_list.append(ekid_01) + + ekids_list = echildren(node) + reliobjs_list = [] + dobjs_list=[] + for ekid in ekids_list: + if true_deprel(ekid) == 'nsubjpass' and ekid.feats['PronType'] == 'Rel': + reliobjs_list.append(ekid) + if true_deprel(ekid) in ['dobj','ccomp','xcomp']: + dobjs_list.append(ekid) + if len(reliobjs_list) == 0: + raise ValueError('Relative clause, but not obj relclause.') + if len(dobjs_list) ==0: + raise ValueError('Is not indirect object.') + epar_list = eparents(node) + triples = [] + for epar in epar_list: + if epar.upos in ('NOUN', 'PROPN'): + triples.append((node, 'iobj', epar)) + # for ekid_controllee in ekids_controllees_list: + # triples.append((ekid_controllee, 'nsubj', epar)) + return triples + + + + +def en_verb_has_dobj_is_relclPassive(node): # does not check controlled werbs + if node.upos != 'VERB': + raise ValueError('It is not a verb.') + if true_deprel(node) != 'acl:relcl': + raise ValueError('It is not a relative clause.') + # ekids_controllees_list = [] + # if en_verb_controller_YN(node): + # ekids_01_list = echildren(node) + + # for ekid_01 in ekids_01_list: + # if true_deprel(ekid_01) == 'xcomp' and not (en_verb_passive_form_YN(ekid_01)): + # ekids_controllees_list.append(ekid_01) + + ekids_list = echildren(node) + reldobjs_list = [] + dobjs_list=[] + for ekid in ekids_list: + if true_deprel(ekid) == 'nsubjpass' and ekid.feats['PronType'] == 'Rel': + reldobjs_list.append(ekid) + if true_deprel(ekid) in ['dobj','ccomp'] or (true_deprel(ekid) == 'xcomp' and not (ekid.lemma in ('call', 'consider'))): + # todo: funkce - seznam sloves, ktera maji xcomp jako doplnek adj nebo noun + dobjs_list.append(ekid) + if len(reldobjs_list) == 0: + raise ValueError('Relative clause, but not obj, probably subject relclause.') + if len(dobjs_list) !=0: + raise ValueError('Is not direct object.') + epar_list = eparents(node) + triples = [] + for epar in epar_list: + if epar.upos in ('NOUN', 'PROPN'): + triples.append((node, 'dobj', epar)) + # for ekid_controllee in ekids_controllees_list: + # triples.append((ekid_controllee, 'nsubj', epar)) + return triples + + + + + +def en_verb_has_dobj_is_relclActive(node): # does not check controlled werbs + if node.upos != 'VERB': + raise ValueError('It is not a verb.') + if true_deprel(node) != 'acl:relcl': + raise ValueError('It is not a relative clause.') + active=False + ekids_list = echildren(node) + for ekid in ekids_list: + if true_deprel(ekid) == 'nsubj': + active=True + if true_deprel(ekid) == 'nsubj' and ekid.feats['PronType'] == 'Rel': + raise ValueError('It is a subject clause.') + if (true_deprel(ekid) == 'dobj' and not(ekid.feats['PronType'] == 'Rel')): + raise ValueError('Is not direct object.') + if not active: + raise ValueError('Verb is not active') + epar_list = eparents(node) + triples = [] + if not (en_verb_controller_YN(node)): + for epar in epar_list: + if epar.upos in ('NOUN', 'PROPN'): + triples.append((node, 'dobj', epar)) + + else: + descends_list = node.descendants + real_controllees = [] + for descend in descends_list: + controlee_bool=False + if descend.deprel == 'xcomp' and descend.upos == 'VERB': + desc_kids = echildren(descend) + for desc_kid in desc_kids: + if desc_kid.deprel == 'xcomp' and desc_kid.upos=='VERB': + controllee_bool=True + if not(controlee_bool): + real_controllees.append(descend) + + for epar in epar_list: + if epar.upos in ('NOUN', 'PROPN'): + for real_controllee in real_controllees: + triples.append((real_controllee, 'dobj', epar)) + return triples + +"""beginning of derivation functions""" + +def en_verbs_001a_der_ADJx_N1__V1_ADVx(node,dictionary_param_1,dictionary_param_2,der_halucinate_param): + triples = [] + verbs = der.get_verb_from_noun(node.lemma,dictionary_param_1) + if len(verbs) == 0: + eval.evaluate_neg(node, 'en_verbs_001a_der_ADJx_N1__V1_ADVx',None, "Does not exist verb derived from noun.") + noun_childs = eschildren(node) + noun_parents = eparents(node) + try: + adjs1 = noun_childs + except: + adjs1 = [] + try: + adjs = adjs1 + noun_parents + except: + adjs = adjs1 + + if len(adjs) == 0: + eval.evaluate_neg(node, 'en_verbs_001a_der_ADJx_N1__V1_ADVx',None, "Noun does not have parents nor childs.") + + for adj in adjs: + if ((adj in noun_parents) and (adj.upos == 'ADJ') and (node.deprel in ['nsubj', 'acl:relcl'])) or ((adj in noun_childs) and (adj.upos == 'ADJ')): + # if (child.upos == 'ADJ') and (child.deprel == 'amod'): + advs = der.get_adv_from_adj(adj.lemma,dictionary_param_2) + if len(advs) == 0: + eval.evaluate_neg(node, 'en_verbs_001a_der_ADJx_N1__V1_ADVx', adj, "Does not exist adv derived from adj.") + for adv in advs: + new_child = Node() + new_child.lemma = adv.lemma + new_child.upos='ADV' + for verb in verbs: + v = Node() + v.lemma = verb.lemma + v.upos= 'VERB' + if ('der' in der_halucinate_param): + triples.append((v, 'en_verbs_001a_der_ADJx_N1__V1_ADVx', new_child)) + eval.evaluate_triple(node,'en_verbs_001a_der_ADJx_N1__V1_ADVx',v, + 'en_verbs_001a_der_ADJx_N1__V1_ADVx', new_child, + verb.type_of_deriv, adv.type_of_deriv, node.upos, + adj.upos, v.upos, new_child.upos) + if ('halucinate' in der_halucinate_param): + triples.append((v, 'advmod', new_child)) + eval.evaluate_triple(node, 'en_verbs_001a_der_ADJx_N1__V1_ADVx', v, + 'advmod', new_child, + verb.type_of_deriv, adv.type_of_deriv, node.upos, adj.upos, + v.upos, new_child.upos) + else: + eval.evaluate_neg(node,'en_verbs_001a_der_ADJx_N1__V1_ADVx',adj,"Parent or child is not adj or wrong deprel.") + return triples + +def en_nouns_001b_der_V1_ADVx__ADJx_N1(node, dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + + verb_childs = eschildren(node) + if len(verb_childs) == 0: + eval.evaluate_neg(node, 'en_nouns_001b_der_V1_ADVx__ADJx_N1',None,"Verb does not have childs.") + + nouns = der.get_noun_from_verb(node.lemma, dictionary_param1) + if len(nouns) == 0: + eval.evaluate_neg(node, 'en_nouns_001b_der_V1_ADVx__ADJx_N1',None,"Does not exist noun derived from verb.") + + for child in verb_childs: + if (child.upos == 'ADV') and (child.deprel == 'advmod'): + adjs = der.get_adj_from_adv(child.lemma, dictionary_param2) + if len(adjs) == 0: + eval.evaluate_neg(node, 'en_nouns_001b_der_V1_ADVx__ADJx_N1',child, "Does not exist adj derived from adv.") + for adj in adjs: + new_child=Node() + new_child.lemma=adj.lemma + new_child.upos='ADJ' + for noun in nouns: + n = Node() + n.lemma = noun.lemma + n.upos='NOUN' + if ('der') in der_halucinate_param: + triples.append((n, 'en_nouns_001b_der_V1_ADVx__ADJx_N1', new_child)) + eval.evaluate_triple(node, 'en_nouns_001b_der_V1_ADVx__ADJx_N1', n, + 'en_nouns_001b_der_V1_ADVx__ADJx_N1', new_child, + noun.type_of_deriv, adj.type_of_deriv, node.upos, child.upos, + n.upos, new_child.upos) + if ('halucinate') in der_halucinate_param: + triples.append((n, 'amod', new_child)) + eval.evaluate_triple(node, 'en_nouns_001b_der_V1_ADVx__ADJx_N1', n, + 'amod', new_child, + noun.type_of_deriv, adj.type_of_deriv, node.upos, child.upos, + n.upos, new_child.upos) + else: + eval.evaluate_neg(node, 'en_nouns_001b_der_V1_ADVx__ADJx_N1',child, "Child is not adverbium or its deprel is other than advmod." ) + return triples + +def en_nouns_002a_der_V1_NX__Nx_N1(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + if len(parents) == 0: + eval.evaluate_neg(node, 'en_nouns_002a_der_V1_NX__Nx_N1',None, "Noun does not have parents.") + for parent in parents: + if parent.upos == 'VERB' and node.deprel == 'dobj': + nouns = der.get_noun_from_verb(parent.lemma, dictionary_param1) + if len(nouns) == 0: + eval.evaluate_neg(node, 'en_nouns_002a_der_V1_NX__Nx_N1', parent, "Does not exist noun derived from verb.") + for noun in nouns: + n = Node() + n.lemma = noun.lemma + n.upos='NOUN' + if 'der' in der_halucinate_param: + triples.append((n, 'en_nouns_002a_der_V1_NX__Nx_N1', node)) + eval.evaluate_triple(node, 'en_nouns_002a_der_V1_NX__Nx_N1', n, + 'en_nouns_002a_der_V1_NX__Nx_N1', node, + noun.type_of_deriv, 'none', parent.upos, node.upos, + n.upos, node.upos) + if 'halucinate' in der_halucinate_param: + triples.append((n, 'compound' , node)) + eval.evaluate_triple(node, 'en_nouns_002a_der_V1_NX__Nx_N1', n, + 'compound', node, + noun.type_of_deriv, 'none', parent.upos, node.upos, + n.upos, node.upos) + else: + eval.evaluate_neg(node, 'en_nouns_002a_der_V1_NX__Nx_N1',parent, "Noun parent is not verb or noun deprel is other than dobj.") + return triples + + +def en_verbs_002b_der_Nx_N1__V1_Nx(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + if len(parents) == 0: + eval.evaluate_neg(node, 'en_verbs_002b_der_Nx_N1__V1_Nx', None,"Noun does not have parent.") + for parent in parents: + if parent.upos == 'NOUN' and node.deprel == 'compound': + verbs = der.get_verb_from_noun(parent.lemma,dictionary_param1) + if len(verbs) == 0: + eval.evaluate_neg(node, 'en_verbs_002b_der_Nx_N1__V1_Nx',parent,"Does not exist verb derived from noun.") + for verb in verbs: + v = Node() + v.lemma = verb.lemma + v.upos='VERB' + if 'der' in der_halucinate_param: + triples.append((v, 'en_verbs_002b_der_Nx_N1__V1_Nx', node)) + eval.evaluate_triple(node, 'en_verbs_002b_der_Nx_N1__V1_Nx', v, + 'en_verbs_002b_der_Nx_N1__V1_Nx', node, + verb.type_of_deriv, 'none', node.upos, parent.upos, + v.upos, node.upos) + if 'halucinate' in der_halucinate_param: + triples.append((v, 'dep', node)) + eval.evaluate_triple(node, 'en_verbs_002b_der_Nx_N1__V1_Nx', v, + 'dep', node, + verb.type_of_deriv, 'none', node.upos, parent.upos, + v.upos, node.upos) + else: + eval.evaluate_neg(node, 'en_verbs_002b_der_Nx_N1__V1_Nx',parent,"Noun parent is not noun or its deprel is other than compound.") + return triples + + +def en_verbs_003a_der_ADJ1_Nx__V1_Nx(node, dictionary_param1, dictionary_param2, der_halucinate_param): + triples = [] + nouns= [] + adj_parents = eparents(node) + adj_childs = eschildren(node) + try: + nouns1 = adj_parents + except: + nouns1=[] + try: + nouns=nouns1 + adj_childs + except: + nouns = nouns1 + + if len(nouns) == 0: + eval.evaluate_neg(node, 'en_verbs_003a_der_ADJ1_Nx__V1_Nx',None,"Adj does not have parent nor childs.") + + for noun in nouns: + #if noun.upos == 'NOUN': + if ((noun in adj_childs) and (noun.upos == 'NOUN') and (noun.deprel in ['nsubj', 'acl:relcl'])) or ((noun in adj_parents) and (noun.upos == 'NOUN')): + verbs = der.get_verb_from_adj(node.lemma,dictionary_param1) + if len(verbs) == 0: + eval.evaluate_neg(node, 'en_verbs_003a_der_ADJ1_Nx__V1_Nx', noun, "Does not exist derived verb from adj.") + for verb in verbs: + v = Node() + v.lemma = verb.lemma + v.upos='VERB' + if 'der' in der_halucinate_param: + triples.append((verb, 'en_verbs_003a_der_ADJ1_Nx__V1_Nx', noun)) + eval.evaluate_triple(node, 'en_verbs_003a_der_ADJ1_Nx__V1_Nx', v, + 'en_verbs_003a_der_ADJ1_Nx__V1_Nx', noun, + verb.type_of_deriv, 'none', node.upos, noun.upos, + v.upos, noun.upos) + if 'halucinate' in der_halucinate_param: + triples.append((verb, 'dep', noun)) + eval.evaluate_triple(node, 'en_verbs_003a_der_ADJ1_Nx__V1_Nx', v, + 'dep', noun, + verb.type_of_deriv, 'none', node.upos, noun.upos, + v.upos, noun.upos) + if len(triples) == 0: + eval.evaluate_neg(node, 'en_verbs_003a_der_ADJ1_Nx__V1_Nx',noun,"Parent or child is not noun or it has wrong deprel.") + return triples + + +def en_nouns_003b_der_V1_Nx__ADJ1_Nx(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + if len(parents) == 0: + eval.evaluate_neg(node, 'en_nouns_003b_der_V1_Nx__ADJ1_Nx',None, "Noun does not have parents.") + for parent in parents: + if ((parent.upos == 'VERB') and ((node.deprel == 'nsubj') or (node.deprel == 'dobj'))): + adjs = der.get_adj_from_verb(parent.lemma,dictionary_param1) + if len(adjs) == 0: + eval.evaluate_neg(node, 'en_nouns_003b_der_V1_Nx__ADJ1_Nx',parent, "Adj derived from verb does not exist.") + for adj in adjs: + a = Node() + a.lemma = adj.lemma + a.upos='ADJ' + #if (node.deprel == 'nsubj') or (node.deprel == 'dobj'): + if 'der' in der_halucinate_param: + triples.append((a, 'en_nouns_003b_der_V1_Nx__ADJ1_Nx', node)) + eval.evaluate_triple(node, 'en_nouns_003b_der_V1_Nx__ADJ1_Nx', a, + 'en_nouns_003b_der_V1_Nx__ADJ1_Nx', node, + adj.type_of_deriv, 'none', parent.upos, node.upos, + a.upos, node.upos) + if 'halucinate' in der_halucinate_param: + triples.append((a, 'amod', node)) + eval.evaluate_triple(node, 'en_nouns_003b_der_V1_Nx__ADJ1_Nx', a, + 'amod', node, + adj.type_of_deriv, 'none', parent.upos, node.upos, + a.upos, node.upos) + else: + eval.evaluate_neg(node, 'en_nouns_003b_der_V1_Nx__ADJ1_Nx',parent,"Parent is not verb.") + return triples + +def en_nouns_004a_der_V1_prepNX__N1_prepNx(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + if len(parents) == 0: + eval.evaluate_neg(node, 'en_nouns_004a_der_V1_prepNX__N1_prepNx',None,"Noun does not have parent.") + + for parent in parents: + if parent.upos == 'VERB' and node.deprel == 'nmod': + nouns = der.get_noun_from_verb(parent.lemma,dictionary_param1) + if len(nouns) == 0: + eval.evaluate_neg(node, 'en_nouns_004a_der_V1_prepNX__N1_prepNx',parent,"Noun derived from verb does not exist.") + for noun in nouns: + n = Node() + n.lemma = noun.lemma + n.upos='NOUN' + if 'der' in der_halucinate_param: + triples.append((n, 'en_nouns_004a_der_V1_prepNX__N1_prepNx', node)) + eval.evaluate_triple(node, 'en_nouns_004a_der_V1_prepNX__N1_prepNx', n, + 'en_nouns_004a_der_V1_prepNX__N1_prepNx', node, + noun.type_of_deriv, 'none', parent.upos, node.upos, + n.upos, node.upos) + if 'halucinate' in der_halucinate_param: + triples.append((n, 'nmod' , node)) + eval.evaluate_triple(node, 'en_nouns_004a_der_V1_prepNX__N1_prepNx', n, + 'nmod', node, + noun.type_of_deriv, 'none', parent.upos, node.upos, + n.upos, node.upos) + else: + eval.evaluate_neg(node, 'en_nouns_004a_der_V1_prepNX__N1_prepNx', parent, "Parent is not verb or noun deprel is not nmod.") + return triples + +def en_verbs_004b_der_N1_prepNx__V1_prepNx(node,dictionary_param1,dictionnary_param2,der_halucinate_param): + triples = [] + noun_parents = eparents(node) + if len(noun_parents)==0: + eval.evaluate_neg(node, 'en_verbs_004b_der_N1_prepNx__V1_prepNx',None, "Noun does not have parents.") + for parent in noun_parents: + if parent.upos == 'NOUN' and node.deprel == 'nmod': + verbs = der.get_verb_from_noun(parent.lemma,dictionary_param1) + if len(verbs)== 0: + eval.evaluate_neg(node, 'en_verbs_004b_der_N1_prepNx__V1_prepNx',parent,"Verb derived from noun does not exist.") + for verb in verbs: + v= Node() + v.lemma = verb.lemma + v.upos='VERB' + if 'der' in der_halucinate_param: + triples.append((v, 'en_verbs_004b_der_N1_prepNx__V1_prepNx', node)) + eval.evaluate_triple(node, 'en_verbs_004b_der_N1_prepNx__V1_prepNx', v, + 'en_verbs_004b_der_N1_prepNx__V1_prepNx', node, + verb.type_of_deriv, 'none', parent.upos, node.upos, + v.upos, node.upos) + if 'halucinate' in der_halucinate_param: + triples.append((v, 'nmod', node)) + eval.evaluate_triple(node, 'en_verbs_004b_der_N1_prepNx__V1_prepNx', v, + 'nmod', node, + verb.type_of_deriv, 'none', parent.upos, node.upos, + v.upos, node.upos) + else: + eval.evaluate_neg(node, 'en_verbs_004b_der_N1_prepNx__V1_prepNx', parent, "Noun parent is not noun or it's deprel is not nmod.") + return triples + + +def en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1(node, dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + adjs = [] + noun_childs = eschildren(node) + noun_parents = eparents(node) + + try: + adjs1 = noun_parents + except: + adjs1=[] + try: + adjs=adjs1 + noun_childs + except: + adjs = adjs1 + + if len(adjs) ==0: + eval.evaluate_neg(node, 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', None, "Noun does not have childs nor parents.") + + for adj in adjs: + if ((adj in noun_parents) and (adj.upos == 'ADJ') and (node.deprel in ['nsubj','acl:relcl'])) or ((adj in noun_childs) and (adj.upos == 'ADJ')): + neg_adjs = der.get_neg_adj_from_adj(adj.lemma, dictionary_param1) + if len(neg_adjs)==0: + eval.evaluate_neg(node, 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', adj, "Neg_adj derived from adj does not exist.") + for neg_adj in neg_adjs: + new_neg_adj_node=Node() + new_neg_adj_node.lemma=neg_adj.lemma + new_neg_adj_node.upos='ADJ' + if('der' in der_halucinate_param): + triples.append((node, 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', new_neg_adj_node)) + eval.evaluate_triple(node, 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', node, + 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', new_neg_adj_node, + neg_adj.type_of_deriv, 'none', node.upos , adj.upos, + node.upos, new_neg_adj_node.upos) + if ('halucinate' in der_halucinate_param): + triples.append((node, 'amod', new_neg_adj_node)) + eval.evaluate_triple(node, 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1', node, + 'amod', new_neg_adj_node, + neg_adj.type_of_deriv, 'none', node.upos, adj.upos, + node.upos, new_neg_adj_node.upos) + else: + eval.evaluate_neg(node, 'en_nouns_005_Nx_ADJ1__Nx_neg_ADJ1',adj, "Parent / child is not adj or wrong deprel.") + + return triples + + +"""beginning of real functions""" + +def en_verbs_001a_V1_ADVx(node,dictionary_param_1,dictionary_param_2,der_halucinate_param): + triples = [] + noun_childs = eschildren(node) + + for adv in noun_childs: + if ((adv.deprel == 'advmod') and (adv.upos == 'ADV')): + triples.append((node, 'advmod', adv)) + eval.evaluate_real(node, "en_verbs_001a_V1_ADVx", node, 'advmod', adv) + return triples + +def en_nouns_001b_003b_005_ADJx_N1(node, dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + noun_childs = eschildren(node) + noun_parents = eparents(node) + try: + adjs1 = noun_childs + except: + adjs1 = [] + try: + adjs = adjs1 + noun_parents + except: + adjs = adjs1 + + for adj in adjs: + if ((adj in noun_parents) and (adj.upos == 'ADJ') and (node.deprel in ['nsubj', 'acl:relcl'])) or ((adj in noun_childs) and (adj.upos == 'ADJ') and (adj.deprel == 'amod')): + triples.append((node, 'amod', adj)) + eval.evaluate_real(node,"en_nouns_001b_003b_005_ADJx_N1", node, 'amod', adj) + return triples + +def en_nouns_002a_Nx_N1(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + for parent in parents: + if parent.upos == 'NOUN' and node.deprel == 'compound': + triples.append((parent, 'compound' , node)) + eval.evaluate_real(node, "en_nouns_002a_Nx_N1", node, 'compound', parent) + + return triples + + +def en_verbs_002b_003a_V1_Nx(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + for parent in parents: + if parent.upos == 'VERB' and node.deprel == 'dep': + triples.append((parent, 'dep', node)) + eval.evaluate_real(node, "en_verbs_002b_003a_V1_Nx", parent, 'dep', node) + + return triples + + + + +def en_nouns_004a_N1_prepNx(node,dictionary_param1,dictionary_param2,der_halucinate_param): + triples = [] + parents=eparents(node) + for parent in parents: + if parent.upos == 'NOUN' and node.deprel == 'nmod': + triples.append((parent, 'nmod' , node)) + eval.evaluate_real(node, 'en_nouns_004a_der_V1_prepNX__N1_prepNx', parent, 'nmod', node) + + return triples + +def en_verbs_004b_V1_prepNx(node,dictionary_param1,dictionnary_param2,der_halucinate_param): + triples = [] + noun_parents = eparents(node) + for parent in noun_parents: + if parent.upos == 'VERB' and node.deprel == 'nmod': + triples.append((parent, 'nmod', node)) + eval.evaluate_real(node, 'en_verbs_004b_V1_prepNx', parent, 'nmod', node) + return triples + + diff --git a/udapi/block/zellig_harris/tests/test_enhancedeps.py b/udapi/block/zellig_harris/tests/test_enhancedeps.py new file mode 100644 index 00000000..4e680e26 --- /dev/null +++ b/udapi/block/zellig_harris/tests/test_enhancedeps.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import unittest + +from udapi.core.root import Root +from udapi.core.node import Node + + +class TestEffectives(unittest.TestCase): + def test_eparents(self): + tree = Root() + + +if __name__ == "__main__": + unittest.main() diff --git a/udapi/core/basewriter.py b/udapi/core/basewriter.py index 8f52ec0c..96bfec36 100644 --- a/udapi/core/basewriter.py +++ b/udapi/core/basewriter.py @@ -42,7 +42,7 @@ def before_process_document(self, document): else: logging.warning('docname_as_file=1 but the document contains no docname') else: - sys.stdout = sys.__stdout__ + sys.stdout = sys.__stdout__ return old_filehandle = sys.stdout @@ -56,6 +56,7 @@ def before_process_document(self, document): elif filename == '-': logging.info('Writing to stdout.') sys.stdout = sys.__stdout__ + else: logging.info('Writing to file %s.', filename) sys.stdout = open(filename, 'wt', encoding=self.encoding, newline=self.newline) diff --git a/udapi/core/run.py b/udapi/core/run.py index 037ec722..c42ec433 100644 --- a/udapi/core/run.py +++ b/udapi/core/run.py @@ -99,6 +99,10 @@ def _import_blocks(block_names, block_args): return blocks +def derinet(): + pass + + class Run(object): """Processing unit that processes UD data; typically a sequence of blocks.""" @@ -125,6 +129,8 @@ def execute(self): # Import blocks (classes) and construct block instances. blocks = _import_blocks(block_names, block_args) + + # Initialize blocks (process_start). for block in blocks: block.process_start()