13
13
#include "catalog/pg_type.h"
14
14
#include "tsearch/ts_type.h"
15
15
#include "tsearch/ts_utils.h"
16
+ #include "utils/builtins.h"
16
17
17
18
#include "rum.h"
18
19
19
20
#include <math.h>
20
21
21
- PG_FUNCTION_INFO_V1 (gin_tsvector_config );
22
- PG_FUNCTION_INFO_V1 (gin_tsquery_pre_consistent );
23
- PG_FUNCTION_INFO_V1 (gin_tsquery_distance );
22
+ PG_FUNCTION_INFO_V1 (rum_extract_tsvector );
23
+ PG_FUNCTION_INFO_V1 (rum_extract_tsquery );
24
+ PG_FUNCTION_INFO_V1 (rum_tsvector_config );
25
+ PG_FUNCTION_INFO_V1 (rum_tsquery_pre_consistent );
26
+ PG_FUNCTION_INFO_V1 (rum_tsquery_distance );
24
27
25
28
static float calc_rank_and (float * w , Datum * addInfo , bool * addInfoIsNull ,
26
29
int size );
@@ -53,7 +56,7 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
53
56
}
54
57
55
58
Datum
56
- gin_tsquery_pre_consistent (PG_FUNCTION_ARGS )
59
+ rum_tsquery_pre_consistent (PG_FUNCTION_ARGS )
57
60
{
58
61
bool * check = (bool * ) PG_GETARG_POINTER (0 );
59
62
@@ -95,6 +98,7 @@ static WordEntryPosVector POSNULL = {
95
98
{0 }
96
99
};
97
100
101
+ #define SIXTHBIT 0x20
98
102
#define LOWERMASK 0x1F
99
103
100
104
/*
@@ -109,6 +113,38 @@ word_distance(int32 w)
109
113
return 1.0 / (1.005 + 0.05 * exp (((float4 ) w ) / 1.5 - 2 ));
110
114
}
111
115
116
+ static int
117
+ compress_pos (char * target , uint16 * pos , int npos )
118
+ {
119
+ int i ;
120
+ uint16 prev = 0 , delta ;
121
+ char * ptr ;
122
+
123
+ ptr = target ;
124
+ for (i = 0 ; i < npos ; i ++ )
125
+ {
126
+ delta = WEP_GETPOS (pos [i ]) - WEP_GETPOS (prev );
127
+
128
+ while (true)
129
+ {
130
+ if (delta >= SIXTHBIT )
131
+ {
132
+ * ptr = (delta & (~HIGHBIT )) | HIGHBIT ;
133
+ ptr ++ ;
134
+ delta >>= 7 ;
135
+ }
136
+ else
137
+ {
138
+ * ptr = delta | (WEP_GETWEIGHT (pos [i ]) << 5 );
139
+ ptr ++ ;
140
+ break ;
141
+ }
142
+ }
143
+ prev = pos [i ];
144
+ }
145
+ return ptr - target ;
146
+ }
147
+
112
148
static char *
113
149
decompress_pos (char * ptr , uint16 * pos )
114
150
{
@@ -293,7 +329,220 @@ calc_rank(float *w, TSQuery q, Datum *addInfo, bool *addInfoIsNull, int size)
293
329
}
294
330
295
331
Datum
296
- gin_tsquery_distance (PG_FUNCTION_ARGS )
332
+ rum_extract_tsvector (PG_FUNCTION_ARGS )
333
+ {
334
+ TSVector vector = PG_GETARG_TSVECTOR (0 );
335
+ int32 * nentries = (int32 * ) PG_GETARG_POINTER (1 );
336
+ Datum * * addInfo = (Datum * * ) PG_GETARG_POINTER (3 );
337
+ bool * * addInfoIsNull = (bool * * ) PG_GETARG_POINTER (4 );
338
+ Datum * entries = NULL ;
339
+
340
+ * nentries = vector -> size ;
341
+ if (vector -> size > 0 )
342
+ {
343
+ int i ;
344
+ WordEntry * we = ARRPTR (vector );
345
+ WordEntryPosVector * posVec ;
346
+
347
+ entries = (Datum * ) palloc (sizeof (Datum ) * vector -> size );
348
+ * addInfo = (Datum * ) palloc (sizeof (Datum ) * vector -> size );
349
+ * addInfoIsNull = (bool * ) palloc (sizeof (bool ) * vector -> size );
350
+
351
+ for (i = 0 ; i < vector -> size ; i ++ )
352
+ {
353
+ text * txt ;
354
+ bytea * posData ;
355
+ int posDataSize ;
356
+
357
+ txt = cstring_to_text_with_len (STRPTR (vector ) + we -> pos , we -> len );
358
+ entries [i ] = PointerGetDatum (txt );
359
+
360
+ if (we -> haspos )
361
+ {
362
+ posVec = _POSVECPTR (vector , we );
363
+ posDataSize = VARHDRSZ + 2 * posVec -> npos * sizeof (WordEntryPos );
364
+ posData = (bytea * )palloc (posDataSize );
365
+ posDataSize = compress_pos (posData -> vl_dat , posVec -> pos , posVec -> npos ) + VARHDRSZ ;
366
+ SET_VARSIZE (posData , posDataSize );
367
+
368
+ (* addInfo )[i ] = PointerGetDatum (posData );
369
+ (* addInfoIsNull )[i ] = false;
370
+ }
371
+ else
372
+ {
373
+ (* addInfo )[i ] = (Datum )0 ;
374
+ (* addInfoIsNull )[i ] = true;
375
+ }
376
+ we ++ ;
377
+ }
378
+ }
379
+
380
+ PG_FREE_IF_COPY (vector , 0 );
381
+ PG_RETURN_POINTER (entries );
382
+ }
383
+
384
+ /*
385
+ * sort QueryOperands by (length, word)
386
+ */
387
+ static int
388
+ compareQueryOperand (const void * a , const void * b , void * arg )
389
+ {
390
+ char * operand = (char * ) arg ;
391
+ QueryOperand * qa = (* (QueryOperand * const * ) a );
392
+ QueryOperand * qb = (* (QueryOperand * const * ) b );
393
+
394
+ return tsCompareString (operand + qa -> distance , qa -> length ,
395
+ operand + qb -> distance , qb -> length ,
396
+ false);
397
+ }
398
+
399
+ /*
400
+ * Returns a sorted, de-duplicated array of QueryOperands in a query.
401
+ * The returned QueryOperands are pointers to the original QueryOperands
402
+ * in the query.
403
+ *
404
+ * Length of the returned array is stored in *size
405
+ */
406
+ static QueryOperand * *
407
+ SortAndUniqItems (TSQuery q , int * size )
408
+ {
409
+ char * operand = GETOPERAND (q );
410
+ QueryItem * item = GETQUERY (q );
411
+ QueryOperand * * res ,
412
+ * * ptr ,
413
+ * * prevptr ;
414
+
415
+ ptr = res = (QueryOperand * * ) palloc (sizeof (QueryOperand * ) * * size );
416
+
417
+ /* Collect all operands from the tree to res */
418
+ while ((* size )-- )
419
+ {
420
+ if (item -> type == QI_VAL )
421
+ {
422
+ * ptr = (QueryOperand * ) item ;
423
+ ptr ++ ;
424
+ }
425
+ item ++ ;
426
+ }
427
+
428
+ * size = ptr - res ;
429
+ if (* size < 2 )
430
+ return res ;
431
+
432
+ qsort_arg (res , * size , sizeof (QueryOperand * ), compareQueryOperand , (void * ) operand );
433
+
434
+ ptr = res + 1 ;
435
+ prevptr = res ;
436
+
437
+ /* remove duplicates */
438
+ while (ptr - res < * size )
439
+ {
440
+ if (compareQueryOperand ((void * ) ptr , (void * ) prevptr , (void * ) operand ) != 0 )
441
+ {
442
+ prevptr ++ ;
443
+ * prevptr = * ptr ;
444
+ }
445
+ ptr ++ ;
446
+ }
447
+
448
+ * size = prevptr + 1 - res ;
449
+ return res ;
450
+ }
451
+
452
+ Datum
453
+ rum_extract_tsquery (PG_FUNCTION_ARGS )
454
+ {
455
+ TSQuery query = PG_GETARG_TSQUERY (0 );
456
+ int32 * nentries = (int32 * ) PG_GETARG_POINTER (1 );
457
+
458
+ /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
459
+ bool * * ptr_partialmatch = (bool * * ) PG_GETARG_POINTER (3 );
460
+ Pointer * * extra_data = (Pointer * * ) PG_GETARG_POINTER (4 );
461
+
462
+ /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
463
+ int32 * searchMode = (int32 * ) PG_GETARG_POINTER (6 );
464
+ Datum * entries = NULL ;
465
+
466
+ * nentries = 0 ;
467
+
468
+ if (query -> size > 0 )
469
+ {
470
+ QueryItem * item = GETQUERY (query );
471
+ int32 i ,
472
+ j ;
473
+ bool * partialmatch ;
474
+ int * map_item_operand ;
475
+ char * operand = GETOPERAND (query );
476
+ QueryOperand * * operands ;
477
+
478
+ /*
479
+ * If the query doesn't have any required positive matches (for
480
+ * instance, it's something like '! foo'), we have to do a full index
481
+ * scan.
482
+ */
483
+ if (tsquery_requires_match (item ))
484
+ * searchMode = GIN_SEARCH_MODE_DEFAULT ;
485
+ else
486
+ * searchMode = GIN_SEARCH_MODE_ALL ;
487
+
488
+ * nentries = query -> size ;
489
+ operands = SortAndUniqItems (query , nentries );
490
+
491
+ entries = (Datum * ) palloc (sizeof (Datum ) * (* nentries ));
492
+ partialmatch = * ptr_partialmatch = (bool * ) palloc (sizeof (bool ) * (* nentries ));
493
+
494
+ /*
495
+ * Make map to convert item's number to corresponding operand's (the
496
+ * same, entry's) number. Entry's number is used in check array in
497
+ * consistent method. We use the same map for each entry.
498
+ */
499
+ * extra_data = (Pointer * ) palloc (sizeof (Pointer ) * (* nentries ));
500
+ map_item_operand = (int * ) palloc0 (sizeof (int ) * query -> size );
501
+
502
+ for (i = 0 ; i < (* nentries ); i ++ )
503
+ {
504
+ text * txt ;
505
+
506
+ txt = cstring_to_text_with_len (GETOPERAND (query ) + operands [i ]-> distance ,
507
+ operands [i ]-> length );
508
+ entries [i ] = PointerGetDatum (txt );
509
+ partialmatch [i ] = operands [i ]-> prefix ;
510
+ (* extra_data )[i ] = (Pointer ) map_item_operand ;
511
+ }
512
+
513
+ /* Now rescan the VAL items and fill in the arrays */
514
+ for (j = 0 ; j < query -> size ; j ++ )
515
+ {
516
+ if (item [j ].type == QI_VAL )
517
+ {
518
+ QueryOperand * val = & item [j ].qoperand ;
519
+ bool found = false;
520
+
521
+ for (i = 0 ; i < (* nentries ); i ++ )
522
+ {
523
+ if (!tsCompareString (operand + operands [i ]-> distance , operands [i ]-> length ,
524
+ operand + val -> distance , val -> length ,
525
+ false))
526
+ {
527
+ map_item_operand [j ] = i ;
528
+ found = true;
529
+ break ;
530
+ }
531
+ }
532
+
533
+ if (!found )
534
+ elog (ERROR , "Operand not found!" );
535
+ }
536
+ }
537
+ }
538
+
539
+ PG_FREE_IF_COPY (query , 0 );
540
+
541
+ PG_RETURN_POINTER (entries );
542
+ }
543
+
544
+ Datum
545
+ rum_tsquery_distance (PG_FUNCTION_ARGS )
297
546
{
298
547
/* bool *check = (bool *) PG_GETARG_POINTER(0); */
299
548
@@ -312,7 +561,7 @@ gin_tsquery_distance(PG_FUNCTION_ARGS)
312
561
}
313
562
314
563
Datum
315
- gin_tsvector_config (PG_FUNCTION_ARGS )
564
+ rum_tsvector_config (PG_FUNCTION_ARGS )
316
565
{
317
566
GinConfig * config = (GinConfig * )PG_GETARG_POINTER (0 );
318
567
config -> addInfoTypeOid = BYTEAOID ;
0 commit comments