Skip to content

Commit 21b748e

Browse files
committed
1 Fix problem with lost precision in rank with OR-ed lexemes
2 Allow tsquery_in to input void tsquery: resolve dump/restore problem with tsquery
1 parent fbff2e9 commit 21b748e

File tree

3 files changed

+49
-23
lines changed

3 files changed

+49
-23
lines changed

contrib/tsearch2/expected/tsearch2.out

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -746,21 +746,21 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
746746
(1 row)
747747

748748
select rank(' a:1 s:2C d g'::tsvector, 'a | s');
749-
rank
750-
------
751-
0.28
749+
rank
750+
-----------
751+
0.0911891
752752
(1 row)
753753

754754
select rank(' a:1 s:2B d g'::tsvector, 'a | s');
755-
rank
756-
------
757-
0.46
755+
rank
756+
----------
757+
0.151982
758758
(1 row)
759759

760760
select rank(' a:1 s:2 d g'::tsvector, 'a | s');
761-
rank
762-
------
763-
0.19
761+
rank
762+
-----------
763+
0.0607927
764764
(1 row)
765765

766766
select rank(' a:1 s:2C d g'::tsvector, 'a & s');

contrib/tsearch2/query.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Datum to_tsquery_current(PG_FUNCTION_ARGS);
5555
/* parser's states */
5656
#define WAITOPERAND 1
5757
#define WAITOPERATOR 2
58+
#define WAITFIRSTOPERAND 3
5859

5960
/*
6061
* node of query tree, also used
@@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
137138
{
138139
switch (state->state)
139140
{
141+
case WAITFIRSTOPERAND:
140142
case WAITOPERAND:
141143
if (*(state->buf) == '!')
142144
{
@@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
159161
else if (*(state->buf) != ' ')
160162
{
161163
state->valstate.prsbuf = state->buf;
162-
state->state = WAITOPERATOR;
163164
if (gettoken_tsvector(&(state->valstate)))
164165
{
165166
*strval = state->valstate.word;
166167
*lenval = state->valstate.curpos - state->valstate.word;
167168
state->buf = get_weight(state->valstate.prsbuf, weight);
169+
state->state = WAITOPERATOR;
168170
return VAL;
169171
}
172+
else if ( state->state == WAITFIRSTOPERAND )
173+
return END;
170174
else
171175
ereport(ERROR,
172176
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -596,7 +600,7 @@ static QUERYTYPE *
596600

597601
/* init state */
598602
state.buf = buf;
599-
state.state = WAITOPERAND;
603+
state.state = WAITFIRSTOPERAND;
600604
state.count = 0;
601605
state.num = 0;
602606
state.str = NULL;
@@ -616,10 +620,13 @@ static QUERYTYPE *
616620
/* parse query & make polish notation (postfix, but in reverse order) */
617621
makepol(&state, pushval);
618622
pfree(state.valstate.word);
619-
if (!state.num)
620-
ereport(ERROR,
621-
(errcode(ERRCODE_SYNTAX_ERROR),
622-
errmsg("empty query")));
623+
if (!state.num) {
624+
elog(NOTICE, "Query doesn't contain lexem(s)");
625+
query = (QUERYTYPE*)palloc( HDRSIZEQT );
626+
query->len = HDRSIZEQT;
627+
query->size = 0;
628+
return query;
629+
}
623630

624631
/* make finish struct */
625632
commonlen = COMPUTESIZE(state.num, state.sumlen);
@@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS)
905912
PG_FREE_IF_COPY(in, 1);
906913

907914
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
915+
916+
if ( query->size == 0 )
917+
PG_RETURN_POINTER(query);
918+
908919
res = clean_fakeval_v2(GETQUERY(query), &len);
909920
if (!res)
910921
{

contrib/tsearch2/rank.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
257257
int4 dimt,
258258
j,
259259
i;
260-
float res = -1.0;
260+
float res = 0.0;
261261
ITEM **item;
262262
int size = q->size;
263263

@@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
266266

267267
for (i = 0; i < size; i++)
268268
{
269+
float resj,wjm;
270+
int4 jm;
269271
entry = find_wordentry(t, q, item[i]);
270272
if (!entry)
271273
continue;
@@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
281283
post = POSNULL + 1;
282284
}
283285

284-
for (j = 0; j < dimt; j++)
285-
{
286-
if (res < 0)
287-
res = wpos(post[j]);
288-
else
289-
res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j]));
290-
}
286+
resj = 0.0;
287+
wjm = -1.0;
288+
jm = 0;
289+
for (j = 0; j < dimt; j++)
290+
{
291+
resj = resj + wpos(post[j])/((j+1)*(j+1));
292+
if ( wpos(post[j]) > wjm ) {
293+
wjm = wpos(post[j]);
294+
jm = j;
295+
}
296+
}
297+
/*
298+
limit (sum(i/i^2),i->inf) = pi^2/6
299+
resj = sum(wi/i^2),i=1,noccurence,
300+
wi - should be sorted desc,
301+
don't sort for now, just choose maximum weight. This should be corrected
302+
Oleg Bartunov
303+
*/
304+
res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685;
291305
}
306+
res = res /size;
292307
pfree(item);
293308
return res;
294309
}

0 commit comments

Comments
 (0)