postgrespro
diff --git a/‎contrib/shared_ispell/LICENSE
Lines changed: 25 additions & 0 deletions b/‎contrib/shared_ispell/LICENSE
Lines changed: 25 additions & 0 deletions
diff --git a/‎contrib/shared_ispell/META.json
Lines changed: 34 additions & 0 deletions b/‎contrib/shared_ispell/META.json
Lines changed: 34 additions & 0 deletions
diff --git a/‎contrib/shared_ispell/Makefile
Lines changed: 20 additions & 0 deletions b/‎contrib/shared_ispell/Makefile
Lines changed: 20 additions & 0 deletions
diff --git a/‎contrib/shared_ispell/README.md
Lines changed: 138 additions & 0 deletions b/‎contrib/shared_ispell/README.md
Lines changed: 138 additions & 0 deletions
@@ -0,0 +1,25 @@
+Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are
+permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, this list of
+      conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, this list
+      of conditions and the following disclaimer in the documentation and/or other materials
+      provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY TOMAS VONDRA ''AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TOMAS VONDRA OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The views and conclusions contained in the software and documentation are those of the
+authors and should not be interpreted as representing official policies, either expressed
+or implied, of Tomas Vondra.
@@ -0,0 +1,34 @@
+{
+   "name": "shared_ispell",
+   "abstract": "Provides a shared ispell dictionary - initialized once and stored in shared segment.",
+   "description": "Allows you to allocate area within a shared segment and use it for ispell dictionaries.",
+   "version": "1.0.0",
+   "maintainer": "Tomas Vondra <tv@fuzzy.cz>",
+   "license": "bsd",
+   "prereqs": {
+      "runtime": {
+         "requires": {
+            "PostgreSQL": "8.4.0"
+         }
+      }
+   },
+   "provides": {
+     "query_histogram": {
+       "file": "shared_ispell--1.0.0.sql",
+       "version": "1.0.0"
+     }
+   },
+   "resources": {
+      "repository": {
+        "url":  "https://github.com:tvondra/shared_ispell.git",
+        "web":  "http://github.com/tvondra/shared_ispell",
+        "type": "git"
+      }
+   },
+   "tags" : ["ispell", "shared", "fulltext", "dictionary"],
+   "meta-spec": {
+      "version": "1.0.0",
+      "url": "http://pgxn.org/meta/spec.txt"
+   },
+   "release_status" : "testing"
+}
@@ -0,0 +1,20 @@
+# contrib/shared_ispell/Makefile
+
+MODULE_big = shared_ispell
+OBJS = src/shared_ispell.o
+
+EXTENSION = shared_ispell
+DATA = sql/shared_ispell--1.1.0.sql
+
+REGRESS = shared_ispell
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/shared_ispell
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
@@ -0,0 +1,138 @@
+Shared ISpell Dictionary
+========================
+This PostgreSQL extension provides a shared ispell dictionary, i.e.
+a dictionary that's stored in shared segment. The traditional ispell
+implementation means that each session initializes and stores the
+dictionary on it's own, which means a lot of CPU/RAM is wasted.
+
+This extension allocates an area in shared segment (you have to
+choose the size in advance) and then loads the dictionary into it
+when it's used for the first time.
+
+If you need just snowball-type dictionaries, this extension is not
+really interesting for you. But if you really need an ispell
+dictionary, this may save you a lot of resources.
+
+
+Install
+-------
+Installing the extension is quite simple, especially if you're on 9.1.
+In that case all you need to do is this:
+
+    $ make install
+
+and then (after connecting to the database)
+
+    db=# CREATE EXTENSION shared_ispell;
+
+If you're on pre-9.1 version, you'll have to do the second part manually
+by running the SQL script (shared_ispell--x.y.sql) in the database. If
+needed, replace MODULE_PATHNAME by $libdir.
+
+
+Config
+------
+No the functions are created, but you still need to load the shared
+module. This needs to be done from postgresql.conf, as the module
+needs to allocate space in the shared memory segment. So add this to
+the config file (or update the current values)
+
+    # libraries to load
+    shared_preload_libraries = 'shared_ispell'
+
+    # known GUC prefixes
+    custom_variable_classes = 'shared_ispell'
+
+    # config of the shared memory
+    shared_ispell.max_size = 32MB
+
+Yes, there's a single GUC variable that defines the maximum size of
+the shared segment. This is a hard limit, the shared segment is not
+extensible and you need to set it so that all the dictionaries fit
+into it and not much memory is wasted.
+
+To find out how much memory you actually need, use a large value
+(e.g. 200MB) and load all the dictionaries you want to use. Then use
+the shared_ispell_mem_used() function to find out how much memory
+was actually used (and set the max_size GUC variable accordingly).
+
+Don't set it exactly to that value, leave there some free space,
+so that you can reload the dictionaries without changing the GUC
+max_size limit (which requires a restart of the DB). Ssomething
+like 512kB should be just fine.
+
+The shared segment can contain several dictionaries at the same time,
+the amount of memory is the only limit. There's no limit on number
+of dictionaries / words etc. Just the max_size GUC variable.
+
+
+Using the dictionary
+--------------------
+Technically, the extension defines a 'shared_ispell' template that
+you may use to define custom dictionaries. E.g. you may do this
+
+    CREATE TEXT SEARCH DICTIONARY czech_shared (
+        TEMPLATE = shared_ispell,
+        DictFile = czech,
+        AffFile = czech,
+        StopWords = czech
+    );
+
+    CREATE TEXT SEARCH CONFIGURATION public.czech_shared
+        ( COPY = pg_catalog.simple );
+
+    ALTER TEXT SEARCH CONFIGURATION czech_shared
+        ALTER MAPPING FOR asciiword, asciihword, hword_asciipart,
+                        word, hword, hword_part
+        WITH czech_shared;
+
+and then do the usual stuff, e.g.
+
+    db=# SELECT ts_lexize('czech_shared', 'automobile');
+
+or whatever you want.
+
+
+Available functions
+-------------------
+The extension provides five management functions, that allow you to
+manage and get info about the preloaded dictionaries. The first two
+functions
+
+    shared_ispell_mem_used()
+    shared_ispell_mem_available()
+
+allow you to get info about the shared segment (used and free memory)
+e.g. to properly size the segment (max_size). Then there are functions
+return list of dictionaries / stop lists loaded in the shared segment
+
+    shared_ispell_dicts()
+    shared_ispell_stoplists()
+
+e.g. like this
+
+    db=# SELECT * FROM shared_ispell_dicts();
+
+     dict_name | affix_name | words | affixes |  bytes   
+    -----------+------------+-------+---------+----------
+     bulgarian | bulgarian  | 79267 |      12 |  7622128
+     czech     | czech      | 96351 |    2544 | 12715000
+    (2 rows)
+
+
+    db=# SELECT * FROM shared_ispell_stoplists();
+
+     stop_name | words | bytes 
+    -----------+-------+-------
+     czech     |   259 |  4552
+    (1 row)
+
+The last function allows you to reset the dictionary (e.g. so that you
+can reload the updated files from disk). The sessions that already use
+the dictionaries will be forced to reinitialize them (the first one
+will rebuild and copy them in the shared segment, the other ones will
+use this prepared data).
+
+    db=# SELECT shared_ispell_reset();
+
+That's all for now ...