postgrespro
diff --git a/‎doc/src/sgml/charset.sgml
Lines changed: 22 additions & 11 deletions b/‎doc/src/sgml/charset.sgml
Lines changed: 22 additions & 11 deletions
diff --git a/‎src/backend/libpq/hba.c
Lines changed: 2 additions & 1 deletion b/‎src/backend/libpq/hba.c
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/backend/regex/Makefile
Lines changed: 2 additions & 1 deletion b/‎src/backend/regex/Makefile
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/backend/regex/regc_locale.c
Lines changed: 0 additions & 165 deletions b/‎src/backend/regex/regc_locale.c
Lines changed: 0 additions & 165 deletions
@@ -221,17 +221,21 @@ initdb --locale=sv_SE
 
      <listitem>
       <para>
-       The ability to use indexes with <literal>LIKE</> clauses
-       <indexterm><primary>LIKE</><secondary>and locales</></indexterm>
+       The <function>upper</>, <function>lower</>, and <function>initcap</>
+       functions
+       <indexterm><primary>upper</><secondary>and locales</></indexterm>
+       <indexterm><primary>lower</><secondary>and locales</></indexterm>
       </para>
      </listitem>
 
      <listitem>
       <para>
-       The <function>upper</>,  <function>lower</>,  and <function>initcap</>
-       functions
-       <indexterm><primary>upper</><secondary>and locales</></indexterm>
-       <indexterm><primary>lower</><secondary>and locales</></indexterm>
+       Pattern matching operators (<literal>LIKE</>, <literal>SIMILAR TO</>,
+       and POSIX-style regular expressions); locales affect both case
+       insensitive matching and the classification of characters by
+       character-class regular expressions
+       <indexterm><primary>LIKE</><secondary>and locales</></indexterm>
+       <indexterm><primary>regular expressions</><secondary>and locales</></indexterm>
       </para>
      </listitem>
 
@@ -241,6 +245,12 @@ initdb --locale=sv_SE
        <indexterm><primary>to_char</><secondary>and locales</></indexterm>
       </para>
      </listitem>
+
+     <listitem>
+      <para>
+       The ability to use indexes with <literal>LIKE</> clauses
+      </para>
+     </listitem>
     </itemizedlist>
    </para>
 
@@ -319,8 +329,8 @@ initdb --locale=sv_SE
   <indexterm zone="collation"><primary>collation</></>
 
   <para>
-   The collation feature allows specifying the sort order and certain
-   other locale aspects of data per-column, or even per-operation.
+   The collation feature allows specifying the sort order and character
+   classification behavior of data per-column, or even per-operation.
    This alleviates the restriction that the
    <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol> settings
    of a database cannot be changed after its creation.
@@ -351,8 +361,8 @@ initdb --locale=sv_SE
    </para>
 
    <para>
-    When the database system has to perform an ordering or a
-    comparison, it uses the collation of the input expression.  This
+    When the database system has to perform an ordering or a character
+    classification, it uses the collation of the input expression.  This
     happens, for example, with <literal>ORDER BY</literal> clauses
     and function or operator calls such as <literal>&lt;</literal>.
     The collation to apply for an <literal>ORDER BY</literal> clause
@@ -361,7 +371,8 @@ initdb --locale=sv_SE
     below.  In addition to comparison operators, collations are taken into
     account by functions that convert between lower and upper case
     letters, such as <function>lower</>, <function>upper</>, and
-    <function>initcap</>.
+    <function>initcap</>; by pattern matching operators; and by
+    <function>to_char</> and related functions.
    </para>
 
    <para>
 
@@ -25,6 +25,7 @@
 #include <arpa/inet.h>
 #include <unistd.h>
 
+#include "catalog/pg_collation.h"
 #include "libpq/ip.h"
 #include "libpq/libpq.h"
 #include "regex/regex.h"
@@ -1781,7 +1782,7 @@ parse_ident_usermap(List *line, int line_number, const char *usermap_name,
 		 * XXX: Major room for optimization: regexps could be compiled when
 		 * the file is loaded and then re-used in every connection.
 		 */
-		r = pg_regcomp(&re, wstr, wlen, REG_ADVANCED);
+		r = pg_regcomp(&re, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);
 		if (r)
 		{
 			char		errstr[100];
 
@@ -17,6 +17,7 @@ OBJS = regcomp.o regerror.o regexec.o regfree.o
 include $(top_srcdir)/src/backend/common.mk
 
 # mark inclusion dependencies between .c files explicitly
-regcomp.o: regcomp.c regc_lex.c regc_color.c regc_nfa.c regc_cvec.c regc_locale.c
+regcomp.o: regcomp.c regc_lex.c regc_color.c regc_nfa.c regc_cvec.c \
+        regc_locale.c regc_pg_locale.c
 
 regexec.o: regexec.c rege_dfa.c
@@ -350,171 +350,6 @@ static const struct cname
 };
 
 
-/*
- * ctype functions adapted to work on pg_wchar (a/k/a chr)
- *
- * When working in UTF8 encoding, we use the <wctype.h> functions if
- * available.  This assumes that every platform uses Unicode codepoints
- * directly as the wchar_t representation of Unicode.  On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that.  This is only 100%
- * correct in single-byte encodings such as LATINn.  However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't relevant for higher code values anyway.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-static int
-pg_wc_isdigit(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswdigit((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isdigit((unsigned char) c));
-}
-
-static int
-pg_wc_isalpha(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswalpha((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isalpha((unsigned char) c));
-}
-
-static int
-pg_wc_isalnum(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswalnum((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isalnum((unsigned char) c));
-}
-
-static int
-pg_wc_isupper(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswupper((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isupper((unsigned char) c));
-}
-
-static int
-pg_wc_islower(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswlower((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && islower((unsigned char) c));
-}
-
-static int
-pg_wc_isgraph(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswgraph((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isgraph((unsigned char) c));
-}
-
-static int
-pg_wc_isprint(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswprint((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isprint((unsigned char) c));
-}
-
-static int
-pg_wc_ispunct(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswpunct((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && ispunct((unsigned char) c));
-}
-
-static int
-pg_wc_isspace(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswspace((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isspace((unsigned char) c));
-}
-
-static pg_wchar
-pg_wc_toupper(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return towupper((wint_t) c);
-	}
-#endif
-	if (c <= (pg_wchar) UCHAR_MAX)
-		return toupper((unsigned char) c);
-	return c;
-}
-
-static pg_wchar
-pg_wc_tolower(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return towlower((wint_t) c);
-	}
-#endif
-	if (c <= (pg_wchar) UCHAR_MAX)
-		return tolower((unsigned char) c);
-	return c;
-}
-
-
 /*
  * element - map collating-element name to celt
  */