Skip to content

Commit f71c924

Browse files
committed
[ Patch comments in three pieces.] Attached is a pacth against 7.2 which adds locale awareness to the character classes of the regular expression engine. ... > > I still think the xdigit class could be handled the same way the digit > > class is (by enumeration rather than using the isxdigit function). That > > saves you a cicle, and I don't think there's any loss. > > In fact, I will email you when I apply the original patch. I miss that case :-(. Here is the pached patch. ... Here is a patch which addresses Tatsuo's concerns (it does return an static struct instead of constructing it).
1 parent 450e728 commit f71c924

File tree

1 file changed

+92
-2
lines changed

1 file changed

+92
-2
lines changed

src/backend/regex/regcomp.c

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,17 @@
4747
#include "regex/regex.h"
4848
#include "regex/utils.h"
4949
#include "regex/regex2.h"
50-
#include "regex/cclass.h"
5150
#include "regex/cname.h"
51+
#include <locale.h>
52+
53+
struct cclass
54+
{
55+
char *name;
56+
char *chars;
57+
char *multis;
58+
};
59+
static struct cclass* cclasses = NULL;
60+
static struct cclass* cclass_init(void);
5261

5362
/*
5463
* parse structure, passed up and down to avoid global variables and
@@ -174,6 +183,9 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags)
174183
pg_wchar *wcp;
175184
#endif
176185

186+
if ( cclasses == NULL )
187+
cclasses = cclass_init();
188+
177189
#ifdef REDEBUG
178190
#define GOODFLAGS(f) (f)
179191
#else
@@ -884,7 +896,7 @@ p_b_cclass(struct parse * p, cset *cs)
884896
struct cclass *cp;
885897
size_t len;
886898
char *u;
887-
char c;
899+
unsigned char c;
888900

889901
while (MORE() && pg_isalpha(PEEK()))
890902
NEXT();
@@ -1716,3 +1728,81 @@ pg_islower(int c)
17161728
return (islower((unsigned char) c));
17171729
#endif
17181730
}
1731+
1732+
static struct cclass *
1733+
cclass_init(void)
1734+
{
1735+
static struct cclass cclasses_C[] = {
1736+
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" },
1737+
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" },
1738+
{ "blank", " \t", "" },
1739+
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" },
1740+
{ "digit", "0123456789", "" },
1741+
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
1742+
{ "lower", "abcdefghijklmnopqrstuvwxyz", "" },
1743+
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" },
1744+
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
1745+
{ "space", "\t\n\v\f\r ", "" },
1746+
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" },
1747+
{ "xdigit", "0123456789ABCDEFabcdef", "" },
1748+
{ NULL, NULL, "" }
1749+
};
1750+
struct cclass *cp = NULL;
1751+
struct cclass *classes = NULL;
1752+
struct cclass_factory
1753+
{
1754+
char *name;
1755+
int (*func)(int);
1756+
char *chars;
1757+
} cclass_factories [] =
1758+
{
1759+
{ "alnum", isalnum, NULL },
1760+
{ "alpha", isalpha, NULL },
1761+
{ "blank", NULL, " \t" },
1762+
{ "cntrl", iscntrl, NULL },
1763+
{ "digit", NULL, "0123456789" },
1764+
{ "graph", isgraph, NULL },
1765+
{ "lower", islower, NULL },
1766+
{ "print", isprint, NULL },
1767+
{ "punct", ispunct, NULL },
1768+
{ "space", NULL, "\t\n\v\f\r " },
1769+
{ "upper", isupper, NULL },
1770+
{ "xdigit", NULL, "0123456789ABCDEFabcdef" },
1771+
{ NULL, NULL, NULL }
1772+
};
1773+
struct cclass_factory *cf = NULL;
1774+
1775+
if ( strcmp( setlocale( LC_CTYPE, NULL ), "C" ) == 0 )
1776+
return cclasses_C;
1777+
1778+
classes = malloc(sizeof(struct cclass) * (sizeof(cclass_factories) / sizeof(struct cclass_factory)));
1779+
if (classes == NULL)
1780+
elog(ERROR,"cclass_init: out of memory");
1781+
1782+
cp = classes;
1783+
for(cf = cclass_factories; cf->name != NULL; cf++)
1784+
{
1785+
cp->name = strdup(cf->name);
1786+
if ( cf->chars )
1787+
cp->chars = strdup(cf->chars);
1788+
else
1789+
{
1790+
int x = 0, y = 0;
1791+
cp->chars = malloc(sizeof(char) * 256);
1792+
if (cp->chars == NULL)
1793+
elog(ERROR,"cclass_init: out of memory");
1794+
for (x = 0; x < 256; x++)
1795+
{
1796+
if((cf->func)(x))
1797+
*(cp->chars + y++) = x;
1798+
}
1799+
*(cp->chars + y) = '\0';
1800+
}
1801+
cp->multis = "";
1802+
cp++;
1803+
}
1804+
cp->name = cp->chars = NULL;
1805+
cp->multis = "";
1806+
1807+
return classes;
1808+
}

0 commit comments

Comments
 (0)