-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhi_db.sql
253 lines (228 loc) · 10.1 KB
/
hi_db.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
PRAGMA encoding="UTF-8";
PRAGMA foreign_keys = ON;
create table ROOT_TYPE(
root_type varchar(1) primary key /*h:head root, n:non head root*/
);
create table PRECEDENCES(
precedence varchar(12) primary key,
declaration varchar(12)
);
create table LANGUAGES(
lid varchar(5) primary key,
language varchar(128),
head_position smallint, /*0: undefined, 1: head first, 2: head last*/
fst varchar(256),
natural_language smallint /*0: false, otherwise true*/
);
create table GCAT_LID(
gcat varchar(12) not null,
lid varchar(5) references LANGUAGES(lid),
PRIMARY KEY(gcat, lid)
);
create table GCAT(/*Eventually, table for terminal symbols, i.e. to which bison tokens can be assigned*/
gcat varchar(12) not null,
feature varchar(12) not null,
lid varchar(5) references LANGUAGES(lid),
token smallint,/*NULL or '0': don't generate token in bison source, non-NULL: generate token in bison source*/
precedence varchar(12) references PRECEDENCES(precedence),
precedence_level smallint check(precedence_level>=0),
PRIMARY KEY(gcat, feature, lid) /*gcat, feature, lid are all keys as once token literals in bison source will be generated (by concatenating gcat, feature and lid), uniqueness will be granted*/
FOREIGN KEY(gcat, lid) REFERENCES SYMBOLS(symbol, lid)
FOREIGN KEY(feature, lid) REFERENCES SYMBOLS(symbol, lid)
);
/*create unique index i_gcat_lid on GCAT(gcat,lid) where feature='stem' collate nocase;*/
CREATE TRIGGER GCAT_TRIGGER
AFTER INSERT ON GCAT
FOR EACH ROW
BEGIN
INSERT OR IGNORE INTO GCAT_LID(gcat,lid) select gcat,lid from gcat where rowid=last_insert_rowid();
END;
create table SYMBOLS(/*Table for all kinds of symbols: terminals (including gcat features) and non-terminals*/
symbol varchar(12),/*Currently only used to reference from gcat and rule_to_rule_map*/
lid varchar(5) references LANGUAGES(lid),
description varchar(128),
PRIMARY KEY(symbol,lid)
);
/*table for set of symbols to support multi-lingual symbol use in other tables as foreign key*/
create table SYMBOL_SET(
symbol varchar(12),
PRIMARY KEY(symbol)
);
CREATE TRIGGER SYMBOLS_TRIGGER
AFTER INSERT ON SYMBOLS
FOR EACH ROW
BEGIN
INSERT OR IGNORE INTO SYMBOL_SET(symbol) select symbol from symbols where rowid=last_insert_rowid();
END;
/*Handles semantic dependencies like e.g. kick the bucket=kick|1|1|the;kick|1|2|bucket; and so on like look|1|1|up;look|2|1|after;*/
/*That is, it is to store dependencies of one semantic unit (szótári egység)*/
create table DEPOLEX(
lexeme varchar(47),
d_key smallint not null check(d_key>0), /*start value: 1*/
d_counter smallint not null check(d_counter>0), /*start value: 1*/
optional_parent_allowed smallint, /*if true, dependency entry is taken into account as well even if the lexeme was not found as dependency one level above; 0:false, otherwise:true; note that NULL is evaulated as 0*/
d_failover smallint, /*NULL or 0 means end of dependency chain; a failover dependency points to a d_counter (> than current d_counter) and is only executed if the current d_counter dependency check failed*/
d_successor smallint, /*NULL or 0 means end of dependency chain; a successor dependency points to a d_counter (> than current d_counter) and is only executed if the current d_counter dependency check was successful*/
manner smallint,/*0:exactly once, 1:at least once; 2:more than once*/
semantic_dependency varchar(47), /*dependencies must be stored explicitly (which means if a word has no dependencies i.e. can stand on its own,
must be stored with full key entry with NULL semantic_dependecy value), otherwise noone can tell if a functor (word) can stand on its own or only together with other words*/
ref_d_key smallint,/*belongs to the field semantic_dependency in this table*/
PRIMARY KEY(lexeme, d_key, d_counter)
FOREIGN KEY(lexeme, d_key) REFERENCES FUNCTORS(functor, d_key) DEFERRABLE INITIALLY DEFERRED
FOREIGN KEY(semantic_dependency, ref_d_key) REFERENCES FUNCTORS(functor, d_key) DEFERRABLE INITIALLY DEFERRED
);
/*Maps each syntactic rule to a semantic rule (note: semantic combination rules are divided into different steps due to
technical reasons)*/
create table RULE_TO_RULE_MAP(
parent_symbol varchar(12),
head_root_symbol varchar(12),
non_head_root_symbol varchar(12),
step smallint not null check(step>0), /*start value: 1*/
failover smallint, /*smallest value: 1; NULL or 0 means end of rule chain. A failover step is only executed if the current step failed (e.g. either symbols or functors are not found)*/
successor smallint, /*smallest value: 1; NULL or 0 means end of rule chain. A successor step is only executed if the current step succeeded*/
main_node_symbol varchar(12),
main_node_lexeme varchar(47),
main_lookup_root varchar(2) references ROOT_TYPE(root_type),
main_ref_parents varchar(1),
main_lookup_subtree_symbol varchar(12),
main_set_op smallint check(main_set_op>0 and main_set_op<6),
dependent_node_symbol varchar(12),
dependent_node_lexeme varchar(47),
dependency_lookup_root varchar(2) references ROOT_TYPE(root_type),
dependency_ref_parents varchar(1),
dependency_lookup_subtree_symbol varchar(12),
dependent_set_op smallint check(dependent_set_op>0 and dependent_set_op<6),
lid varchar(5) references LANGUAGES(lid),
PRIMARY KEY(parent_symbol, head_root_symbol, non_head_root_symbol, step)
FOREIGN KEY(parent_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(head_root_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(non_head_root_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(main_node_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(main_node_lexeme) REFERENCES FUNCTOR_DECL(functor) DEFERRABLE INITIALLY DEFERRED
FOREIGN KEY(main_lookup_subtree_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(dependent_node_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(dependent_node_lexeme) REFERENCES FUNCTOR_DECL(functor) DEFERRABLE INITIALLY DEFERRED
FOREIGN KEY(dependency_lookup_subtree_symbol) REFERENCES SYMBOL_SET(symbol)
);
create table LEXICON(
word varchar(256),
lid varchar(5) references LANGUAGES(lid) DEFERRABLE INITIALLY DEFERRED,
gcat varchar(12),
lexeme varchar(47),
PRIMARY KEY(word, lid, gcat)
FOREIGN KEY(lexeme) REFERENCES FUNCTOR_DECL(functor) DEFERRABLE INITIALLY DEFERRED
FOREIGN KEY(gcat, lid) REFERENCES GCAT_LID(gcat, lid) DEFERRABLE INITIALLY DEFERRED
);
/*create table TAGCONTEXT(*//*Extracted tags, just for fast tag search to find the right fcontext*/
/*model_id text,*//*id for the model in which the context was interpreted*/
/*context_source text,*//*user or other source of context*/
/*session_id text,*/
/*timestamp text,*/
/*tag_counter smallint,*/
/*tag text,*/
/*value text,*/
/*PRIMARY KEY(model_id,context_source,session_id,timestamp,tag_counter)*/
/*);*/
/*TODO:add language*/
create table ANALYSES(
source text,/*user name or any other source of the utterance*/
timestamp int,/*epoch*/
sentence text,
rank float,/*nr of constants/nr of words ratio, the smaller the better*/
a_counter smallint,/*analysis counter*/
analysis text,
PRIMARY KEY(source,timestamp,sentence,rank,a_counter)
);
/*TODO:add language*/
create table FAILED_ANALYSES(
source text,/*user name or any other source of the utterance*/
timestamp int,/*epoch*/
sentence text,
a_counter smallint,
analysis text,
PRIMARY KEY(source,timestamp,sentence,a_counter)
);
create table FUNCTOR_DECL(
functor varchar(47),
PRIMARY KEY(functor)
);
create table FUNCTOR_IDS(
functor_id varchar(51),
PRIMARY KEY(functor_id)
);
create table FUNCTOR_DEFS(
functor_id varchar(51),
tlid varchar(5),
imp_counter smallint not null check(imp_counter>0),/*start value: 1*/
definition text,
PRIMARY KEY(functor_id, tlid)
);
CREATE TRIGGER FUNCTOR_DEFS_TRIGGER
AFTER INSERT ON FUNCTOR_DEFS
FOR EACH ROW
BEGIN
INSERT OR IGNORE INTO FUNCTOR_IDS(functor_id) select functor_id from functor_defs where rowid=last_insert_rowid();
END;
create table FUNCTOR_TAGS(
functor varchar(47),
d_key smallint,
trigger_tag text,/*serves as condition: if such a tag was created during the interpretation it triggers taking into account (during transcription)
the tag-value pairs of the entry e.g. grammatical mood of the verb (imperative, interrogative, indicative),
since different tag-value pairs may belong to an indicative mood and an imperative mood as in case of
"a directory lists files" and "list files"*/
counter smallint not null check(counter>0),/*start value: 1*/
tag text,/*if the trigger_tag is empty, tag-value pairs are added unconditionally*/
value text,
PRIMARY KEY(functor, d_key, trigger_tag, counter)
FOREIGN KEY(functor, d_key) REFERENCES FUNCTORS(functor, d_key) DEFERRABLE INITIALLY DEFERRED
);
create table FUNCTORS(
functor varchar(47),
d_key smallint not null check(d_key>0),/*start value: 1*/
functor_id varchar(51),
PRIMARY KEY(functor, d_key)
FOREIGN KEY(functor_id) REFERENCES FUNCTOR_IDS(functor_id) DEFERRABLE INITIALLY DEFERRED
);
CREATE TRIGGER FUNCTORS_TRIGGER
AFTER INSERT ON FUNCTORS
FOR EACH ROW
BEGIN
INSERT OR IGNORE INTO FUNCTOR_DECL(functor) select functor from functors where rowid=last_insert_rowid();
END;
create table GRAMMAR(
lid varchar(5) references LANGUAGES(lid) DEFERRABLE INITIALLY DEFERRED,
parent_symbol varchar(12) not null check(length(parent_symbol)>0),
head_symbol varchar(12) not null check(length(head_symbol)>0),
non_head_symbol varchar(12),
precedence varchar(12),
action text,/*if content is in quotes then it is regarded as code, if not then it is regarded as filename*/
PRIMARY KEY(lid, parent_symbol, head_symbol, non_head_symbol)
FOREIGN KEY(parent_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(head_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(non_head_symbol) REFERENCES SYMBOL_SET(symbol)
FOREIGN KEY(precedence) REFERENCES SYMBOL_SET(symbol) /*Reference to GCAT is too strict as it's not a must for a precedence symbol to match a token*/
);
create table SETTINGS(
key text primary key,
value text
);
create table ANALYSES_DEPS(
source text,
timestamp int,
sentence text,
rank float,
a_counter smallint,
mood text,
function text,
counter smallint,
level smallint,
word text,
lexeme text,
d_key smallint,
d_counter smallint,
dependency text,
ref_d_key smallint,
tags text,
c_value text,
PRIMARY KEY(source,timestamp,sentence,rank,a_counter,mood,function)/*key contains that of the analyses table to be able to make match -mood and function don't play a role in matching due to having a timestamp*/
);