Skip to content

Commit 9539194

Browse files
committed
Initial support of phrase search for rum
1 parent dff84c9 commit 9539194

File tree

6 files changed

+138
-11
lines changed

6 files changed

+138
-11
lines changed

data/rum.data

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,5 @@ adorned with fountains and handsome villas. We shall devote to-morrow to
4848
seeing all there is to be seen, and go to Strassburg to-morrow evening
4949
for two or three days. From there to Constance, and then hold _our_
5050
“Council” as to further movements.
51+
def fgr
52+
def xxx fgr

expected/rum.out

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ ORDER BY a >< to_tsquery('pg_catalog.english', 'ever|wrote');
3030
Index Cond: (a @@ '''ever'' | ''wrote'''::tsquery)
3131
(6 rows)
3232

33+
explain (costs off)
34+
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english',
35+
'def <-> fgr');
36+
QUERY PLAN
37+
-----------------------------------------------------------
38+
Aggregate
39+
-> Index Scan using rumidx on test_rum
40+
Index Cond: (a @@ '''def'' <-> ''fgr'''::tsquery)
41+
(3 rows)
42+
3343
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'ever|wrote');
3444
count
3545
-------
@@ -66,6 +76,20 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', '(comp
6676
2
6777
(1 row)
6878

79+
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english',
80+
'def <-> fgr');
81+
count
82+
-------
83+
1
84+
(1 row)
85+
86+
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english',
87+
'def <2> fgr');
88+
count
89+
-------
90+
2
91+
(1 row)
92+
6993
SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), *
7094
FROM test_rum
7195
WHERE a @@ to_tsquery('pg_catalog.english', 'way')

rum--1.0.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ RETURNS bool
3939
AS 'MODULE_PATHNAME'
4040
LANGUAGE C IMMUTABLE STRICT;
4141

42+
CREATE FUNCTION rum_tsquery_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal)
43+
RETURNS bool
44+
AS 'MODULE_PATHNAME'
45+
LANGUAGE C IMMUTABLE STRICT;
46+
4247
CREATE FUNCTION rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal)
4348
RETURNS float8
4449
AS 'MODULE_PATHNAME'
@@ -53,7 +58,7 @@ AS
5358
FUNCTION 1 bttextcmp(text, text),
5459
FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal),
5560
FUNCTION 3 rum_extract_tsquery(tsvector,internal,smallint,internal,internal,internal,internal),
56-
FUNCTION 4 gin_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
61+
FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
5762
FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal),
5863
FUNCTION 6 gin_tsquery_triconsistent(internal,smallint,tsvector,int,internal,internal,internal),
5964
FUNCTION 7 rum_tsvector_config(internal),

rum_ts_utils.c

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ PG_FUNCTION_INFO_V1(rum_extract_tsvector);
2424
PG_FUNCTION_INFO_V1(rum_extract_tsquery);
2525
PG_FUNCTION_INFO_V1(rum_tsvector_config);
2626
PG_FUNCTION_INFO_V1(rum_tsquery_pre_consistent);
27+
PG_FUNCTION_INFO_V1(rum_tsquery_consistent);
2728
PG_FUNCTION_INFO_V1(rum_tsquery_distance);
2829
PG_FUNCTION_INFO_V1(rum_ts_distance);
2930

@@ -43,23 +44,27 @@ static float calc_rank_pos_or(float *w, Datum *addInfo, bool *addInfoIsNull,
4344

4445
static float calc_rank_or(const float *w, TSVector t, TSQuery q);
4546
static float calc_rank_and(const float *w, TSVector t, TSQuery q);
47+
static int count_pos(char *ptr, int len);
48+
static char * decompress_pos(char *ptr, uint16 *pos);
4649

47-
typedef struct
50+
typedef struct
4851
{
4952
QueryItem *first_item;
50-
bool *check;
5153
int *map_item_operand;
54+
bool *check;
5255
bool *need_recheck;
56+
Datum *addInfo;
57+
bool *addInfoIsNull;
5358
} RumChkVal;
5459

5560
static bool
56-
checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data)
61+
pre_checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data)
5762
{
5863
RumChkVal *gcv = (RumChkVal *) checkval;
5964
int j;
6065

6166
/* if any val requiring a weight is used, set recheck flag */
62-
if (val->weight != 0)
67+
if (val->weight != 0 || data != NULL)
6368
*(gcv->need_recheck) = true;
6469

6570
/* convert item's number to corresponding entry's (operand's) number */
@@ -76,8 +81,8 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS)
7681

7782
TSQuery query = PG_GETARG_TSQUERY(2);
7883

79-
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
80-
bool recheck;
84+
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
85+
bool recheck;
8186
bool res = FALSE;
8287

8388
if (query->size > 0)
@@ -97,12 +102,95 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS)
97102
res = TS_execute(GETQUERY(query),
98103
&gcv,
99104
false,
100-
checkcondition_rum);
105+
pre_checkcondition_rum);
101106
}
102107

103108
PG_RETURN_BOOL(res);
104109
}
105110

111+
static bool
112+
checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data)
113+
{
114+
RumChkVal *gcv = (RumChkVal *) checkval;
115+
int j;
116+
117+
/* if any val requiring a weight is used, set recheck flag */
118+
if (val->weight != 0)
119+
*(gcv->need_recheck) = true;
120+
121+
/* convert item's number to corresponding entry's (operand's) number */
122+
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
123+
124+
/* return presence of current entry in indexed value */
125+
if (!gcv->check[j])
126+
return false;
127+
128+
if (data && gcv->addInfo && gcv->addInfoIsNull[j] == false)
129+
{
130+
bytea *positions = DatumGetByteaP(gcv->addInfo[j]);
131+
int32 i;
132+
char *ptrt;
133+
WordEntryPos post;
134+
135+
data->npos = count_pos(VARDATA_ANY(positions),
136+
VARSIZE_ANY_EXHDR(positions));
137+
data->pos = palloc(sizeof(*data->pos) * data->npos);
138+
data->allocated = true;
139+
140+
ptrt = (char *)VARDATA_ANY(positions);
141+
post = 0;
142+
143+
for(i=0; i<data->npos; i++)
144+
{
145+
ptrt = decompress_pos(ptrt, &post);
146+
data->pos[i] = post;
147+
}
148+
}
149+
150+
return true;
151+
}
152+
153+
Datum
154+
rum_tsquery_consistent(PG_FUNCTION_ARGS)
155+
{
156+
bool *check = (bool *) PG_GETARG_POINTER(0);
157+
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
158+
TSQuery query = PG_GETARG_TSQUERY(2);
159+
/* int32 nkeys = PG_GETARG_INT32(3); */
160+
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
161+
bool *recheck = (bool *) PG_GETARG_POINTER(5);
162+
Datum *addInfo = (Datum *) PG_GETARG_POINTER(8);
163+
bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9);
164+
bool res = FALSE;
165+
166+
/* The query requires recheck only if it involves
167+
* weights */
168+
*recheck = false;
169+
170+
if (query->size > 0)
171+
{
172+
QueryItem *item;
173+
RumChkVal gcv;
174+
175+
/*
176+
* check-parameter array has one entry for each value
177+
* (operand) in the query.
178+
*/
179+
gcv.first_item = item = GETQUERY(query);
180+
gcv.check = check;
181+
gcv.map_item_operand = (int *) (extra_data[0]);
182+
gcv.need_recheck = recheck;
183+
gcv.addInfo = addInfo;
184+
gcv.addInfoIsNull = addInfoIsNull;
185+
186+
res = TS_execute(GETQUERY(query), &gcv, true, checkcondition_rum);
187+
}
188+
189+
PG_RETURN_BOOL(res);
190+
}
191+
192+
193+
106194
static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
107195

108196
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
@@ -409,7 +497,8 @@ calc_rank_pos(float *w, TSQuery q, Datum *addInfo, bool *addInfoIsNull, int size
409497
return 0.0;
410498

411499
/* XXX: What about NOT? */
412-
res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
500+
res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
501+
item->qoperator.oper == OP_PHRASE)) ?
413502
calc_rank_pos_and(w, addInfo, addInfoIsNull, size) :
414503
calc_rank_pos_or(w, addInfo, addInfoIsNull, size);
415504

@@ -801,7 +890,7 @@ rum_extract_tsquery(PG_FUNCTION_ARGS)
801890
j;
802891
bool *partialmatch;
803892
int *map_item_operand;
804-
char *operand = GETOPERAND(query);
893+
char *operand = GETOPERAND(query);
805894
QueryOperand **operands;
806895

807896
/*

rumutil.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ initRumState(RumState *state, Relation index)
210210
state->canPreConsistent[i] = false;
211211
}
212212

213-
/*
213+
/*
214214
* Check opclass capability to do order by.
215215
*/
216216
if (index_getprocid(index, i + 1, RUM_ORDERING_PROC) != InvalidOid)

sql/rum.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,20 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'ever|
1717
explain (costs off)
1818
SELECT * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'ever|wrote')
1919
ORDER BY a >< to_tsquery('pg_catalog.english', 'ever|wrote');
20+
explain (costs off)
21+
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english',
22+
'def <-> fgr');
2023

2124
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'ever|wrote');
2225
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'have&wish');
2326
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'knew&brain');
2427
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'among');
2528
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'structure&ancient');
2629
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', '(complimentary|sight)&(sending|heart)');
30+
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english',
31+
'def <-> fgr');
32+
SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english',
33+
'def <2> fgr');
2734
SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), *
2835
FROM test_rum
2936
WHERE a @@ to_tsquery('pg_catalog.english', 'way')

0 commit comments

Comments
 (0)