Skip to content

Commit 6967b76

Browse files
committed
Phrase search: renovate 0.13 at Aug, 12 2009
1 parent c991729 commit 6967b76

24 files changed

+1283
-308
lines changed

src/backend/tsearch/to_tsany.c

Lines changed: 61 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ to_tsvector(PG_FUNCTION_ARGS)
253253
* to_tsquery
254254
*/
255255

256+
typedef struct MorphOpaque
257+
{
258+
Oid cfg_id;
259+
int operator;
260+
} MorphOpaque;
256261

257262
/*
258263
* This function is used for morph parsing.
@@ -270,25 +275,37 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
270275
int32 count = 0;
271276
ParsedText prs;
272277
uint32 variant,
273-
pos,
278+
pos = 0,
274279
cntvar = 0,
275280
cntpos = 0,
276281
cnt = 0;
277-
Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually
278-
* an Oid, not a pointer */
282+
MorphOpaque *data = (MorphOpaque*)DatumGetPointer(opaque);
279283

280284
prs.lenwords = 4;
281285
prs.curwords = 0;
282286
prs.pos = 0;
283287
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
284288

285-
parsetext(cfg_id, &prs, strval, lenval);
289+
parsetext(data->cfg_id, &prs, strval, lenval);
286290

287291
if (prs.curwords > 0)
288292
{
289293

290294
while (count < prs.curwords)
291295
{
296+
if ( pos > 0 && pos+1 < prs.words[count].pos.pos )
297+
{
298+
while( pos+1 < prs.words[count].pos.pos )
299+
{
300+
/* put placeholders for each stop word */
301+
pushStop(state);
302+
if (cntpos)
303+
pushOperator(state, data->operator, 1);
304+
cntpos++;
305+
pos++;
306+
}
307+
}
308+
292309
pos = prs.words[count].pos.pos;
293310
cntvar = 0;
294311
while (count < prs.curwords && pos == prs.words[count].pos.pos)
@@ -303,24 +320,22 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
303320
((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
304321
pfree(prs.words[count].word);
305322
if (cnt)
306-
pushOperator(state, OP_AND);
323+
pushOperator(state, OP_AND, 0);
307324
cnt++;
308325
count++;
309326
}
310327

311328
if (cntvar)
312-
pushOperator(state, OP_OR);
329+
pushOperator(state, OP_OR, 0);
313330
cntvar++;
314331
}
315332

316333
if (cntpos)
317-
pushOperator(state, OP_AND);
318-
334+
pushOperator(state, data->operator, 1);
319335
cntpos++;
320336
}
321337

322338
pfree(prs.words);
323-
324339
}
325340
else
326341
pushStop(state);
@@ -329,44 +344,15 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
329344
Datum
330345
to_tsquery_byid(PG_FUNCTION_ARGS)
331346
{
332-
Oid cfgid = PG_GETARG_OID(0);
333347
text *in = PG_GETARG_TEXT_P(1);
334348
TSQuery query;
335-
QueryItem *res;
336-
int32 len;
337-
338-
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
349+
MorphOpaque data;
339350

340-
if (query->size == 0)
341-
PG_RETURN_TSQUERY(query);
351+
data.cfg_id = PG_GETARG_OID(0);
352+
data.operator = OP_AND;
342353

343-
/* clean out any stopword placeholders from the tree */
344-
res = clean_fakeval(GETQUERY(query), &len);
345-
if (!res)
346-
{
347-
SET_VARSIZE(query, HDRSIZETQ);
348-
query->size = 0;
349-
PG_RETURN_POINTER(query);
350-
}
351-
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
354+
query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), false);
352355

353-
/*
354-
* Removing the stopword placeholders might've resulted in fewer
355-
* QueryItems. If so, move the operands up accordingly.
356-
*/
357-
if (len != query->size)
358-
{
359-
char *oldoperand = GETOPERAND(query);
360-
int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
361-
362-
Assert(len < query->size);
363-
364-
query->size = len;
365-
memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
366-
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
367-
}
368-
369-
pfree(res);
370356
PG_RETURN_TSQUERY(query);
371357
}
372358

@@ -385,44 +371,15 @@ to_tsquery(PG_FUNCTION_ARGS)
385371
Datum
386372
plainto_tsquery_byid(PG_FUNCTION_ARGS)
387373
{
388-
Oid cfgid = PG_GETARG_OID(0);
389374
text *in = PG_GETARG_TEXT_P(1);
390375
TSQuery query;
391-
QueryItem *res;
392-
int32 len;
393-
394-
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
395-
396-
if (query->size == 0)
397-
PG_RETURN_TSQUERY(query);
398-
399-
/* clean out any stopword placeholders from the tree */
400-
res = clean_fakeval(GETQUERY(query), &len);
401-
if (!res)
402-
{
403-
SET_VARSIZE(query, HDRSIZETQ);
404-
query->size = 0;
405-
PG_RETURN_POINTER(query);
406-
}
407-
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
376+
MorphOpaque data;
408377

409-
/*
410-
* Removing the stopword placeholders might've resulted in fewer
411-
* QueryItems. If so, move the operands up accordingly.
412-
*/
413-
if (len != query->size)
414-
{
415-
char *oldoperand = GETOPERAND(query);
416-
int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
378+
data.cfg_id = PG_GETARG_OID(0);
379+
data.operator = OP_AND;
417380

418-
Assert(len < query->size);
381+
query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), true);
419382

420-
query->size = len;
421-
memmove((void *) GETOPERAND(query), oldoperand, lenoperand);
422-
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
423-
}
424-
425-
pfree(res);
426383
PG_RETURN_POINTER(query);
427384
}
428385

@@ -437,3 +394,32 @@ plainto_tsquery(PG_FUNCTION_ARGS)
437394
ObjectIdGetDatum(cfgId),
438395
PointerGetDatum(in)));
439396
}
397+
398+
399+
Datum
400+
phraseto_tsquery_byid(PG_FUNCTION_ARGS)
401+
{
402+
text *in = PG_GETARG_TEXT_P(1);
403+
TSQuery query;
404+
MorphOpaque data;
405+
406+
data.cfg_id = PG_GETARG_OID(0);
407+
data.operator = OP_PHRASE;
408+
409+
query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), true);
410+
411+
PG_RETURN_TSQUERY(query);
412+
}
413+
414+
Datum
415+
phraseto_tsquery(PG_FUNCTION_ARGS)
416+
{
417+
text *in = PG_GETARG_TEXT_P(0);
418+
Oid cfgId;
419+
420+
cfgId = getTSCurrentConfig(true);
421+
PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid,
422+
ObjectIdGetDatum(cfgId),
423+
PointerGetDatum(in)));
424+
}
425+

src/backend/tsearch/ts_parse.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
454454
}
455455

456456
static void
457-
hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
457+
hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
458458
{
459459
int i;
460460
QueryItem *item = GETQUERY(query);
@@ -467,6 +467,7 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
467467
}
468468

469469
word = &(prs->words[prs->curwords - 1]);
470+
word->pos = LIMITPOS(pos);
470471
for (i = 0; i < query->size; i++)
471472
{
472473
if (item->type == QI_VAL &&
@@ -492,6 +493,7 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
492493
{
493494
ParsedLex *tmplexs;
494495
TSLexeme *ptr;
496+
int32 savedpos;
495497

496498
while (lexs)
497499
{
@@ -500,9 +502,12 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
500502
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
501503

502504
ptr = norms;
505+
savedpos = prs->vectorpos;
503506
while (ptr && ptr->lexeme)
504507
{
505-
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
508+
if (ptr->flags & TSL_ADDPOS)
509+
savedpos++;
510+
hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
506511
ptr++;
507512
}
508513

@@ -516,6 +521,8 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
516521
ptr = norms;
517522
while (ptr->lexeme)
518523
{
524+
if (ptr->flags & TSL_ADDPOS)
525+
prs->vectorpos++;
519526
pfree(ptr->lexeme);
520527
ptr++;
521528
}
@@ -575,7 +582,10 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
575582
do
576583
{
577584
if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
585+
{
586+
prs->vectorpos++;
578587
addHLParsedLex(prs, query, lexs, norms);
588+
}
579589
else
580590
addHLParsedLex(prs, query, lexs, NULL);
581591
} while (norms);

src/backend/tsearch/ts_selfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
400400
lookup, length, minfreq);
401401
break;
402402

403+
case OP_PHRASE:
403404
case OP_AND:
404405
s1 = tsquery_opr_selec(item + 1, operand,
405406
lookup, length, minfreq);

src/backend/tsearch/wparser_def.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,15 +2027,36 @@ typedef struct
20272027
} hlCheck;
20282028

20292029
static bool
2030-
checkcondition_HL(void *checkval, QueryOperand *val)
2030+
checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
20312031
{
20322032
int i;
2033+
hlCheck *checkval = (hlCheck *) opaque;
20332034

2034-
for (i = 0; i < ((hlCheck *) checkval)->len; i++)
2035+
for (i = 0; i < checkval->len; i++)
20352036
{
2036-
if (((hlCheck *) checkval)->words[i].item == val)
2037-
return true;
2037+
if (checkval->words[i].item == val)
2038+
{
2039+
/* don't need to find all positions */
2040+
if (!data)
2041+
return true;
2042+
2043+
if (!data->pos)
2044+
{
2045+
data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
2046+
data->allocated = true;
2047+
data->npos = 1;
2048+
data->pos[0] = checkval->words[i].pos;
2049+
}
2050+
else if (data->pos[data->npos-1] < checkval->words[i].pos)
2051+
{
2052+
data->pos[data->npos++] = checkval->words[i].pos;
2053+
}
2054+
}
20382055
}
2056+
2057+
if ( data && data->npos > 0 )
2058+
return true;
2059+
20392060
return false;
20402061
}
20412062

src/backend/utils/adt/tsginidx.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,16 @@ typedef struct
179179
} GinChkVal;
180180

181181
static GinTernaryValue
182-
checkcondition_gin(void *checkval, QueryOperand *val)
182+
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
183183
{
184184
GinChkVal *gcv = (GinChkVal *) checkval;
185185
int j;
186186

187-
/* if any val requiring a weight is used, set recheck flag */
188-
if (val->weight != 0)
187+
/*
188+
* if any val requiring a weight is used or caller
189+
* needs position information then set recheck flag
190+
*/
191+
if (val->weight != 0 || data != NULL)
189192
*(gcv->need_recheck) = true;
190193

191194
/* convert item's number to corresponding entry's (operand's) number */

src/backend/utils/adt/tsgistidx.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ typedef struct
298298
* is there value 'val' in array or not ?
299299
*/
300300
static bool
301-
checkcondition_arr(void *checkval, QueryOperand *val)
301+
checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
302302
{
303303
int32 *StopLow = ((CHKVAL *) checkval)->arrb;
304304
int32 *StopHigh = ((CHKVAL *) checkval)->arre;
@@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val)
327327
}
328328

329329
static bool
330-
checkcondition_bit(void *checkval, QueryOperand *val)
330+
checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
331331
{
332332
/*
333333
* we are not able to find a prefix in signature tree

0 commit comments

Comments
 (0)