Skip to content

Commit 00ceebc

Browse files
author
Artur Zakirov
committed
Added:
- rum_distance_query type, casting from tquery to rum_distance_query - rum_ts_distance() overloads to enable calculate distance with normalization parameter - <=> operator overload
1 parent c63d4ea commit 00ceebc

File tree

5 files changed

+166
-5
lines changed

5 files changed

+166
-5
lines changed

expected/rum.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,29 @@ SELECT
150150
57.5727 | 57.5727 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
151151
(2 rows)
152152

153+
-- Check ranking normalization
154+
SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), *
155+
FROM test_rum
156+
WHERE a @@ to_tsquery('pg_catalog.english', 'way')
157+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way');
158+
rum_ts_distance | t | a
159+
-----------------+--------------------------------------------------------------------------+---------------------------------------------------------------
160+
16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9
161+
16.4493 | itself. Put on your “specs” and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12
162+
16.4493 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2
163+
16.4493 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
164+
(4 rows)
165+
166+
SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), *
167+
FROM test_rum
168+
WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)')
169+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)');
170+
rum_ts_distance | t | a
171+
-----------------+---------------------------------------------------------------------+---------------------------------------------------------
172+
8.22467 | itself. Put on your “specs” and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12
173+
57.5727 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
174+
(2 rows)
175+
153176
INSERT INTO test_rum (t) VALUES ('foo bar foo the over foo qq bar');
154177
INSERT INTO test_rum (t) VALUES ('345 qwerty copyright');
155178
INSERT INTO test_rum (t) VALUES ('345 qwerty');

rum--1.0.sql

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,31 @@ LANGUAGE C;
66
-- Access method
77
CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler;
88

9-
-- Opclasses
9+
-- tsvector opclasses
10+
11+
CREATE TYPE rum_distance_query AS (query tsquery, method int);
12+
13+
CREATE FUNCTION tsquery_to_distance_query(tsquery)
14+
RETURNS rum_distance_query
15+
AS 'MODULE_PATHNAME', 'tsquery_to_distance_query'
16+
LANGUAGE C IMMUTABLE STRICT;
17+
18+
CREATE CAST (tsquery AS rum_distance_query)
19+
WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT;
20+
1021
CREATE FUNCTION rum_ts_distance(tsvector,tsquery)
1122
RETURNS float4
12-
AS 'MODULE_PATHNAME'
23+
AS 'MODULE_PATHNAME', 'rum_ts_distance_tt'
24+
LANGUAGE C IMMUTABLE STRICT;
25+
26+
CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int)
27+
RETURNS float4
28+
AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf'
29+
LANGUAGE C IMMUTABLE STRICT;
30+
31+
CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query)
32+
RETURNS float4
33+
AS 'MODULE_PATHNAME', 'rum_ts_distance_td'
1334
LANGUAGE C IMMUTABLE STRICT;
1435

1536
CREATE OPERATOR <=> (
@@ -18,6 +39,12 @@ CREATE OPERATOR <=> (
1839
PROCEDURE = rum_ts_distance
1940
);
2041

42+
CREATE OPERATOR <=> (
43+
LEFTARG = tsvector,
44+
RIGHTARG = rum_distance_query,
45+
PROCEDURE = rum_ts_distance
46+
);
47+
2148
CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal)
2249
RETURNS internal
2350
AS 'MODULE_PATHNAME'

rum.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,12 @@ extern Datum rum_extract_tsquery(PG_FUNCTION_ARGS);
767767
extern Datum rum_tsvector_config(PG_FUNCTION_ARGS);
768768
extern Datum rum_tsquery_pre_consistent(PG_FUNCTION_ARGS);
769769
extern Datum rum_tsquery_distance(PG_FUNCTION_ARGS);
770-
extern Datum rum_ts_distance(PG_FUNCTION_ARGS);
770+
extern Datum rum_ts_distance_tt(PG_FUNCTION_ARGS);
771+
extern Datum rum_ts_distance_ttf(PG_FUNCTION_ARGS);
772+
extern Datum rum_ts_distance_td(PG_FUNCTION_ARGS);
773+
774+
extern Datum tsquery_to_distance_query(PG_FUNCTION_ARGS);
775+
771776

772777
/* GUC parameters */
773778
extern PGDLLIMPORT int RumFuzzySearchLimit;

rum_ts_utils.c

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@
1111

1212
#include "postgres.h"
1313

14+
#include "access/htup_details.h"
1415
#include "catalog/pg_type.h"
16+
#include "funcapi.h"
1517
#include "miscadmin.h"
1618
#include "tsearch/ts_type.h"
1719
#include "tsearch/ts_utils.h"
1820
#include "utils/array.h"
1921
#include "utils/builtins.h"
22+
#include "utils/typcache.h"
2023

2124
#include "rum.h"
2225

@@ -29,7 +32,11 @@ PG_FUNCTION_INFO_V1(rum_tsquery_pre_consistent);
2932
PG_FUNCTION_INFO_V1(rum_tsquery_consistent);
3033
PG_FUNCTION_INFO_V1(rum_tsquery_timestamp_consistent);
3134
PG_FUNCTION_INFO_V1(rum_tsquery_distance);
32-
PG_FUNCTION_INFO_V1(rum_ts_distance);
35+
PG_FUNCTION_INFO_V1(rum_ts_distance_tt);
36+
PG_FUNCTION_INFO_V1(rum_ts_distance_ttf);
37+
PG_FUNCTION_INFO_V1(rum_ts_distance_td);
38+
39+
PG_FUNCTION_INFO_V1(tsquery_to_distance_query);
3340

3441
static int count_pos(char *ptr, int len);
3542
static char *decompress_pos(char *ptr, uint16 *pos);
@@ -1182,7 +1189,7 @@ rum_tsquery_distance(PG_FUNCTION_ARGS)
11821189
}
11831190

11841191
Datum
1185-
rum_ts_distance(PG_FUNCTION_ARGS)
1192+
rum_ts_distance_tt(PG_FUNCTION_ARGS)
11861193
{
11871194
TSVector txt = PG_GETARG_TSVECTOR(0);
11881195
TSQuery query = PG_GETARG_TSQUERY(1);
@@ -1198,6 +1205,95 @@ rum_ts_distance(PG_FUNCTION_ARGS)
11981205
PG_RETURN_FLOAT4(1.0 / res);
11991206
}
12001207

1208+
Datum
1209+
rum_ts_distance_ttf(PG_FUNCTION_ARGS)
1210+
{
1211+
TSVector txt = PG_GETARG_TSVECTOR(0);
1212+
TSQuery query = PG_GETARG_TSQUERY(1);
1213+
int method = PG_GETARG_INT32(2);
1214+
float4 res;
1215+
1216+
res = calc_score(weights, txt, query, method);
1217+
1218+
PG_FREE_IF_COPY(txt, 0);
1219+
PG_FREE_IF_COPY(query, 1);
1220+
if (res == 0)
1221+
PG_RETURN_FLOAT4(get_float4_infinity());
1222+
else
1223+
PG_RETURN_FLOAT4(1.0 / res);
1224+
}
1225+
1226+
Datum
1227+
rum_ts_distance_td(PG_FUNCTION_ARGS)
1228+
{
1229+
TSVector txt = PG_GETARG_TSVECTOR(0);
1230+
HeapTupleHeader d = PG_GETARG_HEAPTUPLEHEADER(1);
1231+
1232+
Oid tupType = HeapTupleHeaderGetTypeId(d);
1233+
int32 tupTypmod = HeapTupleHeaderGetTypMod(d);
1234+
TupleDesc tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1235+
HeapTupleData tuple;
1236+
1237+
TSQuery query;
1238+
int method;
1239+
bool isnull;
1240+
float4 res;
1241+
1242+
tuple.t_len = HeapTupleHeaderGetDatumLength(d);
1243+
ItemPointerSetInvalid(&(tuple.t_self));
1244+
tuple.t_tableOid = InvalidOid;
1245+
tuple.t_data = d;
1246+
1247+
query = DatumGetTSQuery(fastgetattr(&tuple, 1, tupdesc, &isnull));
1248+
if (isnull)
1249+
{
1250+
ReleaseTupleDesc(tupdesc);
1251+
PG_FREE_IF_COPY(txt, 0);
1252+
PG_FREE_IF_COPY(d, 1);
1253+
elog(ERROR, "NULL query value is not allowed");
1254+
}
1255+
1256+
method = DatumGetInt32(fastgetattr(&tuple, 2, tupdesc, &isnull));
1257+
if (isnull)
1258+
method = 0;
1259+
1260+
res = calc_score(weights, txt, query, method);
1261+
1262+
ReleaseTupleDesc(tupdesc);
1263+
PG_FREE_IF_COPY(txt, 0);
1264+
PG_FREE_IF_COPY(d, 1);
1265+
1266+
if (res == 0)
1267+
PG_RETURN_FLOAT4(get_float4_infinity());
1268+
else
1269+
PG_RETURN_FLOAT4(1.0 / res);
1270+
}
1271+
1272+
Datum
1273+
tsquery_to_distance_query(PG_FUNCTION_ARGS)
1274+
{
1275+
TSQuery query = PG_GETARG_TSQUERY(0);
1276+
1277+
TupleDesc tupdesc;
1278+
HeapTuple htup;
1279+
Datum values[2];
1280+
bool nulls[2];
1281+
1282+
/* Build a tuple descriptor for our result type */
1283+
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1284+
elog(ERROR, "return type must be a row type");
1285+
1286+
tupdesc = BlessTupleDesc(tupdesc);
1287+
1288+
MemSet(nulls, 0, sizeof(nulls));
1289+
values[0] = TSQueryGetDatum(query);
1290+
values[1] = Int32GetDatum(DEF_NORM_METHOD);
1291+
1292+
htup = heap_form_tuple(tupdesc, values, nulls);
1293+
1294+
PG_RETURN_DATUM(HeapTupleGetDatum(htup));
1295+
}
1296+
12011297
Datum
12021298
rum_tsvector_config(PG_FUNCTION_ARGS)
12031299
{

sql/rum.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ SELECT
5252
FROM test_rum
5353
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2;
5454

55+
-- Check ranking normalization
56+
SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), *
57+
FROM test_rum
58+
WHERE a @@ to_tsquery('pg_catalog.english', 'way')
59+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way');
60+
SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), *
61+
FROM test_rum
62+
WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)')
63+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)');
64+
5565
INSERT INTO test_rum (t) VALUES ('foo bar foo the over foo qq bar');
5666
INSERT INTO test_rum (t) VALUES ('345 qwerty copyright');
5767
INSERT INTO test_rum (t) VALUES ('345 qwerty');

0 commit comments

Comments
 (0)