Skip to content

Commit 31b6d84

Browse files
committed
Prevent rank change in case of duplicate search terms
1 parent 5d50873 commit 31b6d84

File tree

1 file changed

+74
-18
lines changed

1 file changed

+74
-18
lines changed

contrib/tsearch2/rank.c

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ static float weights[] = {0.1, 0.2, 0.4, 1.0};
4343

4444
#define DEF_NORM_METHOD 0
4545

46+
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
47+
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
4648
/*
4749
* Returns a weight of a word collocation
4850
*/
@@ -112,6 +114,55 @@ find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
112114
return NULL;
113115
}
114116

117+
118+
static char * SortAndUniqOperand=NULL;
119+
120+
static int
121+
compareITEM( const void * a, const void * b ) {
122+
if ( (*(ITEM**)a)->length == (*(ITEM**)b)->length )
123+
return strncmp( SortAndUniqOperand + (*(ITEM**)a)->distance,
124+
SortAndUniqOperand + (*(ITEM**)b)->distance,
125+
(*(ITEM**)b)->length );
126+
127+
return ((*(ITEM**)a)->length > (*(ITEM**)b)->length) ? 1 : -1;
128+
}
129+
130+
static ITEM**
131+
SortAndUniqItems( char *operand, ITEM *item, int *size ) {
132+
ITEM **res, **ptr, **prevptr;
133+
134+
ptr = res = (ITEM**) palloc( sizeof(ITEM*) * *size );
135+
136+
while( (*size)-- ) {
137+
if ( item->type == VAL ) {
138+
*ptr = item;
139+
ptr++;
140+
}
141+
item++;
142+
}
143+
144+
*size = ptr-res;
145+
if ( *size < 2 )
146+
return res;
147+
148+
SortAndUniqOperand=operand;
149+
qsort( res, *size, sizeof(ITEM**), compareITEM );
150+
151+
ptr = res + 1;
152+
prevptr = res;
153+
154+
while( ptr - res < *size ) {
155+
if ( compareITEM( (void*) ptr, (void*) prevptr ) != 0 ) {
156+
prevptr++;
157+
*prevptr = *ptr;
158+
}
159+
ptr++;
160+
}
161+
162+
*size = prevptr + 1 - res;
163+
return res;
164+
}
165+
115166
static WordEntryPos POSNULL[] = {
116167
0,
117168
0
@@ -120,7 +171,7 @@ static WordEntryPos POSNULL[] = {
120171
static float
121172
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
122173
{
123-
uint16 **pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
174+
uint16 **pos;
124175
int i,
125176
k,
126177
l,
@@ -132,19 +183,22 @@ calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
132183
lenct,
133184
dist;
134185
float res = -1.0;
135-
ITEM *item = GETQUERY(q);
136-
137-
memset(pos, 0, sizeof(uint16 **) * q->size);
186+
ITEM **item;
187+
int size = q->size;
188+
189+
item = SortAndUniqItems( GETOPERAND(q), GETQUERY(q), &size);
190+
if ( size < 2 ) {
191+
pfree(item);
192+
return calc_rank_or(w, t, q);
193+
}
194+
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
195+
memset(pos, 0, sizeof(uint16 *) * q->size);
138196
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
139-
WEP_SETPOS(POSNULL[1], MAXENTRYPOS-1);
197+
WEP_SETPOS(POSNULL[1], MAXENTRYPOS-1);
140198

141-
for (i = 0; i < q->size; i++)
199+
for (i = 0; i < size; i++)
142200
{
143-
144-
if (item[i].type != VAL)
145-
continue;
146-
147-
entry = find_wordentry(t, q, &(item[i]));
201+
entry = find_wordentry(t, q, item[i]);
148202
if (!entry)
149203
continue;
150204

@@ -181,6 +235,7 @@ calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
181235
}
182236
}
183237
pfree(pos);
238+
pfree(item);
184239
return res;
185240
}
186241

@@ -193,16 +248,15 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
193248
j,
194249
i;
195250
float res = -1.0;
196-
ITEM *item = GETQUERY(q);
251+
ITEM **item;
252+
int size = q->size;
197253

198254
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
255+
item = SortAndUniqItems( GETOPERAND(q), GETQUERY(q), &size);
199256

200-
for (i = 0; i < q->size; i++)
257+
for (i = 0; i < size; i++)
201258
{
202-
if (item[i].type != VAL)
203-
continue;
204-
205-
entry = find_wordentry(t, q, &(item[i]));
259+
entry = find_wordentry(t, q, item[i]);
206260
if (!entry)
207261
continue;
208262

@@ -225,6 +279,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
225279
res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j]));
226280
}
227281
}
282+
pfree( item );
228283
return res;
229284
}
230285

@@ -349,7 +404,7 @@ checkcondition_DR(void *checkval, ITEM * val)
349404

350405
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
351406
{
352-
if (val == ptr->item)
407+
if ( val == ptr->item || compareITEM( &val, &(ptr->item) ) == 0 )
353408
return true;
354409
ptr++;
355410
}
@@ -439,6 +494,7 @@ Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int
439494
ch.doc = f;
440495
ch.len = (doc + lastpos) - f + 1;
441496
*pos = f - doc + 1;
497+
SortAndUniqOperand = GETOPERAND(query);
442498
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
443499
{
444500
/*

0 commit comments

Comments
 (0)