Skip to content

Commit ee27a4f

Browse files
committed
rum_tsquery_distance:
- use only one loop - remove allocation of extra memory
1 parent 4caa336 commit ee27a4f

File tree

1 file changed

+57
-62
lines changed

1 file changed

+57
-62
lines changed

rum_ts_utils.c

Lines changed: 57 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -524,28 +524,30 @@ rum_extract_tsquery(PG_FUNCTION_ARGS)
524524
* reconstruct partial tsvector from set of index entries
525525
*/
526526
static TSVector
527-
rum_reconstruct_tsvector(bool *check, TSQuery query, int *map_item_operand,
527+
rum_reconstruct_tsvector(bool *check, int32 nkeys, TSQuery query,
528+
int *map_item_operand,
528529
Datum *addInfo, bool *addInfoIsNull)
529530
{
530531
TSVector tsv;
531-
int cntwords = 0;
532+
int nWords = 0, currentWord = 0;
532533
int i = 0;
533534
QueryItem *item = GETQUERY(query);
534535
char *operandData = GETOPERAND(query);
535-
struct
536-
{
537-
char *word;
538-
char *posptr;
539-
int32 npos;
540-
int32 wordlen;
541-
} *restoredWordEntry;
542536
int len = 0, totallen;
543537
WordEntry *ptr;
544538
char *str;
545539
int stroff;
546540

541+
for(i=0; i<nkeys; i++)
542+
if (check[i])
543+
nWords++;
544+
545+
totallen = CALCDATASIZE(nWords, nWords * 16 * sizeof(uint16)); /* estimation */
546+
tsv = palloc(totallen);
547+
tsv->size = nWords;
547548

548-
restoredWordEntry = palloc(sizeof(*restoredWordEntry) * query->size);
549+
str = STRPTR(tsv);
550+
stroff = 0;
549551

550552
/*
551553
* go through query to collect lexemes and add to them
@@ -560,84 +562,77 @@ rum_reconstruct_tsvector(bool *check, TSQuery query, int *map_item_operand,
560562

561563
if (check[keyN] == true)
562564
{
565+
int npos = 0;
566+
bytea *positions;
567+
563568
/*
564569
* entries could be repeated in tsquery, do not visit them twice
565570
* or more. Modifying of check array (entryRes) is safe
566571
*/
567572
check[keyN] = false;
568573

569-
restoredWordEntry[cntwords].word = operandData + item->qoperand.distance;
570-
restoredWordEntry[cntwords].wordlen = item->qoperand.length;
571-
572574
len += item->qoperand.length;
573575

574576
if (addInfoIsNull[keyN] == false)
575577
{
576-
bytea *positions = DatumGetByteaP(addInfo[keyN]);
578+
positions = DatumGetByteaP(addInfo[keyN]);
577579

578-
restoredWordEntry[cntwords].npos = count_pos(VARDATA_ANY(positions),
579-
VARSIZE_ANY_EXHDR(positions));
580-
restoredWordEntry[cntwords].posptr = VARDATA_ANY(positions);
580+
npos = count_pos(VARDATA_ANY(positions),
581+
VARSIZE_ANY_EXHDR(positions));
581582

582583
len = SHORTALIGN(len);
583-
len += sizeof(uint16) +
584-
restoredWordEntry[cntwords].npos * sizeof(WordEntryPos);
584+
len += sizeof(uint16) + npos * sizeof(WordEntryPos);
585585
}
586-
else
586+
587+
while(CALCDATASIZE(nWords, len) > totallen)
587588
{
588-
restoredWordEntry[cntwords].npos = 0;
589+
totallen *= 2;
590+
tsv = repalloc(tsv, totallen);
591+
str = STRPTR(tsv);
589592
}
590593

591-
cntwords++;
592-
}
593-
}
594-
item++;
595-
}
594+
ptr = ARRPTR(tsv) + currentWord;
596595

597-
totallen = CALCDATASIZE(cntwords, len);
598-
tsv = palloc(totallen);
599-
SET_VARSIZE(tsv, totallen);
600-
tsv->size = cntwords;
601-
602-
ptr = ARRPTR(tsv);
603-
str = STRPTR(tsv);
604-
stroff = 0;
596+
ptr->len = item->qoperand.length;
597+
ptr->pos = stroff;
598+
memcpy(str + stroff, operandData + item->qoperand.distance, ptr->len);
599+
stroff += ptr->len;
605600

606-
for (i=0; i<cntwords; i++)
607-
{
608-
ptr->len = restoredWordEntry[i].wordlen;
609-
ptr->pos = stroff;
610-
memcpy(str + stroff, restoredWordEntry[i].word, ptr->len);
611-
stroff += ptr->len;
601+
if (npos)
602+
{
603+
WordEntryPos *wptr,
604+
posv = 0;
605+
int j;
606+
char *posptr = VARDATA_ANY(positions);
612607

613-
if (restoredWordEntry[i].npos)
614-
{
615-
WordEntryPos *wptr,
616-
post = 0;
617-
int j;
608+
ptr->haspos = 1;
618609

619-
ptr->haspos = 1;
610+
stroff = SHORTALIGN(stroff);
611+
*(uint16 *) (str + stroff) = npos;
612+
wptr = POSDATAPTR(tsv, ptr);
620613

621-
stroff = SHORTALIGN(stroff);
622-
*(uint16 *) (str + stroff) = restoredWordEntry[i].npos;
623-
wptr = POSDATAPTR(tsv, ptr);
614+
for (j=0; j<npos; j++)
615+
{
616+
posptr = decompress_pos(posptr, &posv);
617+
wptr[j] = posv;
618+
}
619+
stroff += sizeof(uint16) + npos * sizeof(WordEntryPos);
620+
}
621+
else
622+
{
623+
ptr->haspos = 0;
624+
}
624625

625-
for (j=0; j<restoredWordEntry[i].npos; j++)
626-
{
627-
restoredWordEntry[i].posptr = decompress_pos(restoredWordEntry[i].posptr, &post);
628-
wptr[j] = post;
626+
currentWord++;
629627
}
630-
stroff += sizeof(uint16) + restoredWordEntry[i].npos * sizeof(WordEntryPos);
631-
}
632-
else
633-
{
634-
ptr->haspos = 0;
635628
}
636629

637-
ptr++;
630+
item++;
638631
}
639632

640-
pfree(restoredWordEntry);
633+
Assert(nWords == currentWord);
634+
totallen = CALCDATASIZE(nWords, len);
635+
SET_VARSIZE(tsv, totallen);
641636

642637
return tsv;
643638
}
@@ -649,15 +644,15 @@ rum_tsquery_distance(PG_FUNCTION_ARGS)
649644

650645
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
651646
TSQuery query = PG_GETARG_TSQUERY(2);
652-
/* int32 nkeys = PG_GETARG_INT32(3); */
647+
int32 nkeys = PG_GETARG_INT32(3);
653648
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
654649
Datum *addInfo = (Datum *) PG_GETARG_POINTER(8);
655650
bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9);
656651
float8 res;
657652
int *map_item_operand = (int *) (extra_data[0]);
658653
TSVector tsv;
659654

660-
tsv = rum_reconstruct_tsvector(check, query, map_item_operand,
655+
tsv = rum_reconstruct_tsvector(check, nkeys, query, map_item_operand,
661656
addInfo, addInfoIsNull);
662657

663658
res = DatumGetFloat4(DirectFunctionCall2Coll(ts_rank_tt,

0 commit comments

Comments
 (0)