Skip to content

Commit 21c9857

Browse files
committed
Merge branch 'PGPRO_shared_ispell' into PGPRO9_5
Merge shared_ispell contrib module
2 parents 0cbe1ca + f21a0de commit 21c9857

File tree

15 files changed

+1797
-0
lines changed

15 files changed

+1797
-0
lines changed

contrib/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ SUBDIRS = \
4444
pgstattuple \
4545
postgres_fdw \
4646
seg \
47+
shared_ispell \
4748
spi \
4849
sr_plan \
4950
tablefunc \

contrib/shared_ispell/LICENSE

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved.
2+
3+
Redistribution and use in source and binary forms, with or without modification, are
4+
permitted provided that the following conditions are met:
5+
6+
1. Redistributions of source code must retain the above copyright notice, this list of
7+
conditions and the following disclaimer.
8+
9+
2. Redistributions in binary form must reproduce the above copyright notice, this list
10+
of conditions and the following disclaimer in the documentation and/or other materials
11+
provided with the distribution.
12+
13+
THIS SOFTWARE IS PROVIDED BY TOMAS VONDRA ''AS IS'' AND ANY EXPRESS OR IMPLIED
14+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
15+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TOMAS VONDRA OR
16+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
18+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22+
23+
The views and conclusions contained in the software and documentation are those of the
24+
authors and should not be interpreted as representing official policies, either expressed
25+
or implied, of Tomas Vondra.

contrib/shared_ispell/META.json

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"name": "shared_ispell",
3+
"abstract": "Provides a shared ispell dictionary - initialized once and stored in shared segment.",
4+
"description": "Allows you to allocate area within a shared segment and use it for ispell dictionaries.",
5+
"version": "1.0.0",
6+
"maintainer": "Tomas Vondra <tv@fuzzy.cz>",
7+
"license": "bsd",
8+
"prereqs": {
9+
"runtime": {
10+
"requires": {
11+
"PostgreSQL": "8.4.0"
12+
}
13+
}
14+
},
15+
"provides": {
16+
"query_histogram": {
17+
"file": "shared_ispell--1.0.0.sql",
18+
"version": "1.0.0"
19+
}
20+
},
21+
"resources": {
22+
"repository": {
23+
"url": "https://github.com:tvondra/shared_ispell.git",
24+
"web": "http://github.com/tvondra/shared_ispell",
25+
"type": "git"
26+
}
27+
},
28+
"tags" : ["ispell", "shared", "fulltext", "dictionary"],
29+
"meta-spec": {
30+
"version": "1.0.0",
31+
"url": "http://pgxn.org/meta/spec.txt"
32+
},
33+
"release_status" : "testing"
34+
}

contrib/shared_ispell/Makefile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# contrib/shared_ispell/Makefile
2+
3+
MODULE_big = shared_ispell
4+
OBJS = src/shared_ispell.o
5+
6+
EXTENSION = shared_ispell
7+
DATA = shared_ispell--1.1.0.sql
8+
9+
REGRESS = shared_ispell
10+
11+
EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/postgresql.conf
12+
13+
ifdef USE_PGXS
14+
PG_CONFIG = pg_config
15+
PGXS := $(shell $(PG_CONFIG) --pgxs)
16+
include $(PGXS)
17+
else
18+
subdir = contrib/shared_ispell
19+
top_builddir = ../..
20+
include $(top_builddir)/src/Makefile.global
21+
include $(top_srcdir)/contrib/contrib-global.mk
22+
endif
23+
24+
installcheck:
25+
@echo "installcheck is disabled"

contrib/shared_ispell/README.md

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
Shared ISpell Dictionary
2+
========================
3+
This PostgreSQL extension provides a shared ispell dictionary, i.e.
4+
a dictionary that's stored in shared segment. The traditional ispell
5+
implementation means that each session initializes and stores the
6+
dictionary on it's own, which means a lot of CPU/RAM is wasted.
7+
8+
This extension allocates an area in shared segment (you have to
9+
choose the size in advance) and then loads the dictionary into it
10+
when it's used for the first time.
11+
12+
If you need just snowball-type dictionaries, this extension is not
13+
really interesting for you. But if you really need an ispell
14+
dictionary, this may save you a lot of resources.
15+
16+
17+
Install
18+
-------
19+
Installing the extension is quite simple, especially if you're on 9.1.
20+
In that case all you need to do is this:
21+
22+
$ make install
23+
24+
and then (after connecting to the database)
25+
26+
db=# CREATE EXTENSION shared_ispell;
27+
28+
If you're on pre-9.1 version, you'll have to do the second part manually
29+
by running the SQL script (shared_ispell--x.y.sql) in the database. If
30+
needed, replace MODULE_PATHNAME by $libdir.
31+
32+
33+
Config
34+
------
35+
No the functions are created, but you still need to load the shared
36+
module. This needs to be done from postgresql.conf, as the module
37+
needs to allocate space in the shared memory segment. So add this to
38+
the config file (or update the current values)
39+
40+
# libraries to load
41+
shared_preload_libraries = 'shared_ispell'
42+
43+
# config of the shared memory
44+
shared_ispell.max_size = 32MB
45+
46+
Yes, there's a single GUC variable that defines the maximum size of
47+
the shared segment. This is a hard limit, the shared segment is not
48+
extensible and you need to set it so that all the dictionaries fit
49+
into it and not much memory is wasted.
50+
51+
To find out how much memory you actually need, use a large value
52+
(e.g. 200MB) and load all the dictionaries you want to use. Then use
53+
the shared_ispell_mem_used() function to find out how much memory
54+
was actually used (and set the max_size GUC variable accordingly).
55+
56+
Don't set it exactly to that value, leave there some free space,
57+
so that you can reload the dictionaries without changing the GUC
58+
max_size limit (which requires a restart of the DB). Ssomething
59+
like 512kB should be just fine.
60+
61+
The shared segment can contain several dictionaries at the same time,
62+
the amount of memory is the only limit. There's no limit on number
63+
of dictionaries / words etc. Just the max_size GUC variable.
64+
65+
66+
Using the dictionary
67+
--------------------
68+
Technically, the extension defines a 'shared_ispell' template that
69+
you may use to define custom dictionaries. E.g. you may do this
70+
71+
CREATE TEXT SEARCH DICTIONARY czech_shared (
72+
TEMPLATE = shared_ispell,
73+
DictFile = czech,
74+
AffFile = czech,
75+
StopWords = czech
76+
);
77+
78+
CREATE TEXT SEARCH CONFIGURATION public.czech_shared
79+
( COPY = pg_catalog.simple );
80+
81+
ALTER TEXT SEARCH CONFIGURATION czech_shared
82+
ALTER MAPPING FOR asciiword, asciihword, hword_asciipart,
83+
word, hword, hword_part
84+
WITH czech_shared;
85+
86+
and then do the usual stuff, e.g.
87+
88+
db=# SELECT ts_lexize('czech_shared', 'automobile');
89+
90+
or whatever you want.
91+
92+
93+
Available functions
94+
-------------------
95+
The extension provides five management functions, that allow you to
96+
manage and get info about the preloaded dictionaries. The first two
97+
functions
98+
99+
shared_ispell_mem_used()
100+
shared_ispell_mem_available()
101+
102+
allow you to get info about the shared segment (used and free memory)
103+
e.g. to properly size the segment (max_size). Then there are functions
104+
return list of dictionaries / stop lists loaded in the shared segment
105+
106+
shared_ispell_dicts()
107+
shared_ispell_stoplists()
108+
109+
e.g. like this
110+
111+
db=# SELECT * FROM shared_ispell_dicts();
112+
113+
dict_name | affix_name | words | affixes | bytes
114+
-----------+------------+-------+---------+----------
115+
bulgarian | bulgarian | 79267 | 12 | 7622128
116+
czech | czech | 96351 | 2544 | 12715000
117+
(2 rows)
118+
119+
120+
db=# SELECT * FROM shared_ispell_stoplists();
121+
122+
stop_name | words | bytes
123+
-----------+-------+-------
124+
czech | 259 | 4552
125+
(1 row)
126+
127+
The last function allows you to reset the dictionary (e.g. so that you
128+
can reload the updated files from disk). The sessions that already use
129+
the dictionaries will be forced to reinitialize them (the first one
130+
will rebuild and copy them in the shared segment, the other ones will
131+
use this prepared data).
132+
133+
db=# SELECT shared_ispell_reset();
134+
135+
That's all for now ...

0 commit comments

Comments
 (0)