Skip to content

Commit 8a3631f

Browse files
committed
GIN: Generalized Inverted iNdex.
text[], int4[], Tsearch2 support for GIN.
1 parent 427c6b5 commit 8a3631f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+5871
-50
lines changed

contrib/tsearch2/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $
1+
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
22

33
MODULE_big = tsearch2
44
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
@@ -7,7 +7,7 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
77
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
88
tsvector_op.o rank.o ts_stat.o \
99
query_util.o query_support.o query_rewrite.o query_gist.o \
10-
ts_locale.o
10+
ts_locale.o ginidx.o
1111

1212
SUBDIRS := snowball ispell wordparser
1313
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)

contrib/tsearch2/expected/tsearch2.out

+39
Original file line numberDiff line numberDiff line change
@@ -3001,3 +3001,42 @@ select a is null, a from test_tsvector order by a;
30013001
t |
30023002
(514 rows)
30033003

3004+
drop index wowidx;
3005+
create index wowidx on test_tsvector using gin (a);
3006+
set enable_seqscan=off;
3007+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
3008+
count
3009+
-------
3010+
158
3011+
(1 row)
3012+
3013+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
3014+
count
3015+
-------
3016+
17
3017+
(1 row)
3018+
3019+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
3020+
count
3021+
-------
3022+
6
3023+
(1 row)
3024+
3025+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
3026+
count
3027+
-------
3028+
98
3029+
(1 row)
3030+
3031+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
3032+
count
3033+
-------
3034+
23
3035+
(1 row)
3036+
3037+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
3038+
count
3039+
-------
3040+
39
3041+
(1 row)
3042+

contrib/tsearch2/ginidx.c

+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#include "postgres.h"
2+
3+
#include <float.h>
4+
5+
#include "access/gist.h"
6+
#include "access/itup.h"
7+
#include "access/tuptoaster.h"
8+
#include "storage/bufpage.h"
9+
#include "utils/array.h"
10+
#include "utils/builtins.h"
11+
12+
#include "tsvector.h"
13+
#include "query.h"
14+
#include "query_cleanup.h"
15+
16+
PG_FUNCTION_INFO_V1(gin_extract_tsvector);
17+
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
18+
19+
Datum
20+
gin_extract_tsvector(PG_FUNCTION_ARGS) {
21+
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
22+
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
23+
Datum *entries = NULL;
24+
25+
*nentries = 0;
26+
if ( vector->size > 0 ) {
27+
int i;
28+
WordEntry *we = ARRPTR( vector );
29+
30+
*nentries = (uint32)vector->size;
31+
entries = (Datum*)palloc( sizeof(Datum) * vector->size );
32+
33+
for(i=0;i<vector->size;i++) {
34+
text *txt = (text*)palloc( VARHDRSZ + we->len );
35+
36+
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
37+
memcpy( VARDATA(txt), STRPTR( vector ) + we->pos, we->len );
38+
39+
entries[i] = PointerGetDatum( txt );
40+
41+
we++;
42+
}
43+
}
44+
45+
PG_FREE_IF_COPY(vector, 0);
46+
PG_RETURN_POINTER(entries);
47+
}
48+
49+
50+
PG_FUNCTION_INFO_V1(gin_extract_tsquery);
51+
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
52+
53+
Datum
54+
gin_extract_tsquery(PG_FUNCTION_ARGS) {
55+
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
56+
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
57+
StrategyNumber strategy = DatumGetUInt16( PG_GETARG_DATUM(2) );
58+
Datum *entries = NULL;
59+
60+
*nentries = 0;
61+
if ( query->size > 0 ) {
62+
int4 i, j=0, len;
63+
ITEM *item;
64+
65+
item = clean_NOT_v2(GETQUERY(query), &len);
66+
if ( !item )
67+
elog(ERROR,"Query requires full scan, GIN doesn't support it");
68+
69+
item = GETQUERY(query);
70+
71+
for(i=0; i<query->size; i++)
72+
if ( item[i].type == VAL )
73+
(*nentries)++;
74+
75+
entries = (Datum*)palloc( sizeof(Datum) * (*nentries) );
76+
77+
for(i=0; i<query->size; i++)
78+
if ( item[i].type == VAL ) {
79+
text *txt;
80+
81+
txt = (text*)palloc( VARHDRSZ + item[i].length );
82+
83+
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
84+
memcpy( VARDATA(txt), GETOPERAND( query ) + item[i].distance, item[i].length );
85+
86+
entries[j++] = PointerGetDatum( txt );
87+
88+
if ( strategy == 1 && item[i].weight != 0 )
89+
elog(ERROR,"With class of lexeme restrictions use @@@ operation");
90+
}
91+
92+
}
93+
94+
PG_FREE_IF_COPY(query, 0);
95+
PG_RETURN_POINTER(entries);
96+
}
97+
98+
typedef struct {
99+
ITEM *frst;
100+
bool *mapped_check;
101+
} GinChkVal;
102+
103+
static bool
104+
checkcondition_gin(void *checkval, ITEM * val) {
105+
GinChkVal *gcv = (GinChkVal*)checkval;
106+
107+
return gcv->mapped_check[ val - gcv->frst ];
108+
}
109+
110+
PG_FUNCTION_INFO_V1(gin_ts_consistent);
111+
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
112+
113+
Datum
114+
gin_ts_consistent(PG_FUNCTION_ARGS) {
115+
bool *check = (bool*)PG_GETARG_POINTER(0);
116+
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
117+
bool res = FALSE;
118+
119+
if ( query->size > 0 ) {
120+
int4 i, j=0;
121+
ITEM *item;
122+
GinChkVal gcv;
123+
124+
gcv.frst = item = GETQUERY(query);
125+
gcv.mapped_check= (bool*)palloc( sizeof(bool) * query->size );
126+
127+
for(i=0; i<query->size; i++)
128+
if ( item[i].type == VAL )
129+
gcv.mapped_check[ i ] = check[ j++ ];
130+
131+
132+
res = TS_execute(
133+
GETQUERY(query),
134+
&gcv,
135+
true,
136+
checkcondition_gin
137+
);
138+
139+
}
140+
141+
PG_FREE_IF_COPY(query, 2);
142+
PG_RETURN_BOOL(res);
143+
}
144+
145+

contrib/tsearch2/sql/tsearch2.sql

+11
Original file line numberDiff line numberDiff line change
@@ -363,3 +363,14 @@ select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
363363
insert into test_tsvector values (null, null);
364364
select a is null, a from test_tsvector order by a;
365365

366+
drop index wowidx;
367+
create index wowidx on test_tsvector using gin (a);
368+
set enable_seqscan=off;
369+
370+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
371+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
372+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
373+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
374+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
375+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
376+

contrib/tsearch2/tsearch.sql.in

+46
Original file line numberDiff line numberDiff line change
@@ -1146,8 +1146,54 @@ AS
11461146
FUNCTION 7 gtsq_same (gtsq, gtsq, internal),
11471147
STORAGE gtsq;
11481148

1149+
--GIN support function
1150+
CREATE FUNCTION gin_extract_tsvector(tsvector,internal)
1151+
RETURNS internal
1152+
AS 'MODULE_PATHNAME'
1153+
LANGUAGE C RETURNS NULL ON NULL INPUT;
1154+
1155+
CREATE FUNCTION gin_extract_tsquery(tsquery,internal,internal)
1156+
RETURNS internal
1157+
AS 'MODULE_PATHNAME'
1158+
LANGUAGE C RETURNS NULL ON NULL INPUT;
1159+
1160+
CREATE FUNCTION gin_ts_consistent(internal,internal,tsquery)
1161+
RETURNS bool
1162+
AS 'MODULE_PATHNAME'
1163+
LANGUAGE C RETURNS NULL ON NULL INPUT;
1164+
1165+
CREATE OPERATOR @@@ (
1166+
LEFTARG = tsvector,
1167+
RIGHTARG = tsquery,
1168+
PROCEDURE = exectsq,
1169+
COMMUTATOR = '@@@',
1170+
RESTRICT = contsel,
1171+
JOIN = contjoinsel
1172+
);
1173+
CREATE OPERATOR @@@ (
1174+
LEFTARG = tsquery,
1175+
RIGHTARG = tsvector,
1176+
PROCEDURE = rexectsq,
1177+
COMMUTATOR = '@@@',
1178+
RESTRICT = contsel,
1179+
JOIN = contjoinsel
1180+
);
1181+
1182+
CREATE OPERATOR CLASS gin_tsvector_ops
1183+
DEFAULT FOR TYPE tsvector USING gin
1184+
AS
1185+
OPERATOR 1 @@ (tsvector, tsquery),
1186+
OPERATOR 2 @@@ (tsvector, tsquery) RECHECK,
1187+
FUNCTION 1 bttextcmp(text, text),
1188+
FUNCTION 2 gin_extract_tsvector(tsvector,internal),
1189+
FUNCTION 3 gin_extract_tsquery(tsquery,internal,internal),
1190+
FUNCTION 4 gin_ts_consistent(internal,internal,tsquery),
1191+
STORAGE text;
1192+
1193+
11491194
--example of ISpell dictionary
11501195
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
11511196
--example of synonym dict
11521197
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
1198+
11531199
END;

src/backend/access/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
#
22
# Makefile for the access methods module
33
#
4-
# $PostgreSQL: pgsql/src/backend/access/Makefile,v 1.10 2005/11/07 17:36:44 tgl Exp $
4+
# $PostgreSQL: pgsql/src/backend/access/Makefile,v 1.11 2006/05/02 11:28:54 teodor Exp $
55
#
66

77
subdir = src/backend/access
88
top_builddir = ../../..
99
include $(top_builddir)/src/Makefile.global
1010

11-
SUBDIRS := common gist hash heap index nbtree transam
11+
SUBDIRS := common gist hash heap index nbtree transam gin
1212
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)
1313

1414
all: SUBSYS.o

src/backend/access/gin/Makefile

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#-------------------------------------------------------------------------
2+
#
3+
# Makefile--
4+
# Makefile for access/gin
5+
#
6+
# IDENTIFICATION
7+
# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.1 2006/05/02 11:28:54 teodor Exp $
8+
#
9+
#-------------------------------------------------------------------------
10+
11+
subdir = src/backend/access/gin
12+
top_builddir = ../../../..
13+
include $(top_builddir)/src/Makefile.global
14+
15+
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
16+
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
17+
ginbulk.o
18+
19+
all: SUBSYS.o
20+
21+
SUBSYS.o: $(OBJS)
22+
$(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
23+
24+
depend dep:
25+
$(CC) -MM $(CFLAGS) *.c >depend
26+
27+
clean:
28+
rm -f SUBSYS.o $(OBJS)
29+
30+
ifeq (depend,$(wildcard depend))
31+
include depend
32+
endif

0 commit comments

Comments
 (0)