Skip to content

Commit bf03cfd

Browse files
committed
Windows support in pg_import_system_collations
Windows can enumerate the locales that are either installed or supported by calling EnumSystemLocalesEx(), similar to what is already done in the READ_LOCALE_A_OUTPUT switch. We can refactor some of the logic already used in that switch into a new function create_collation_from_locale(). The enumerated locales have BCP 47 shape, that is with a hyphen between language and territory, instead of POSIX's underscore. The created collations will retain the BCP 47 shape, but we will also create a POSIX alias, so xx-YY will have an xx_YY alias. A new test collate.windows.win1252 is added that is like collate.linux.utf8. Author: Juan Jose Santamaria Flecha <juanjo.santamaria@gmail.com> Reviewed-by: Dmitry Koval <d.koval@postgrespro.ru> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/0050ec23-34d9-2765-9015-98c04f0e18ac@postgrespro.ru
1 parent 33ab0a2 commit bf03cfd

File tree

6 files changed

+1650
-52
lines changed

6 files changed

+1650
-52
lines changed

src/backend/commands/collationcmds.c

Lines changed: 187 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,12 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
499499
#define READ_LOCALE_A_OUTPUT
500500
#endif
501501

502+
/* will we use EnumSystemLocalesEx in pg_import_system_collations? */
503+
#ifdef WIN32
504+
#define ENUM_SYSTEM_LOCALE
505+
#endif
506+
507+
502508
#ifdef READ_LOCALE_A_OUTPUT
503509
/*
504510
* "Normalize" a libc locale name, stripping off encoding tags such as
@@ -610,6 +616,161 @@ get_icu_locale_comment(const char *localename)
610616
#endif /* USE_ICU */
611617

612618

619+
/*
620+
* Create a new collation using the input locale 'locale'. (subroutine for
621+
* pg_import_system_collations())
622+
*
623+
* 'nspid' is the namespace id where the collation will be created.
624+
*
625+
* 'nvalidp' is incremented if the locale has a valid encoding.
626+
*
627+
* 'ncreatedp' is incremented if the collation is actually created. If the
628+
* collation already exists it will quietly do nothing.
629+
*
630+
* The returned value is the encoding of the locale, -1 if the locale is not
631+
* valid for creating a collation.
632+
*
633+
*/
634+
pg_attribute_unused()
635+
static int
636+
create_collation_from_locale(const char *locale, int nspid,
637+
int *nvalidp, int *ncreatedp)
638+
{
639+
int enc;
640+
Oid collid;
641+
642+
/*
643+
* Some systems have locale names that don't consist entirely of
644+
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
645+
* This is pretty silly, since we need the locale itself to
646+
* interpret the non-ASCII characters. We can't do much with
647+
* those, so we filter them out.
648+
*/
649+
if (!pg_is_ascii(locale))
650+
{
651+
elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
652+
return -1;
653+
}
654+
655+
enc = pg_get_encoding_from_locale(locale, false);
656+
if (enc < 0)
657+
{
658+
elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
659+
return -1;
660+
}
661+
if (!PG_VALID_BE_ENCODING(enc))
662+
{
663+
elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
664+
return -1;
665+
}
666+
if (enc == PG_SQL_ASCII)
667+
return -1; /* C/POSIX are already in the catalog */
668+
669+
/* count valid locales found in operating system */
670+
(*nvalidp)++;
671+
672+
/*
673+
* Create a collation named the same as the locale, but quietly
674+
* doing nothing if it already exists. This is the behavior we
675+
* need even at initdb time, because some versions of "locale -a"
676+
* can report the same locale name more than once. And it's
677+
* convenient for later import runs, too, since you just about
678+
* always want to add on new locales without a lot of chatter
679+
* about existing ones.
680+
*/
681+
collid = CollationCreate(locale, nspid, GetUserId(),
682+
COLLPROVIDER_LIBC, true, enc,
683+
locale, locale, NULL,
684+
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
685+
true, true);
686+
if (OidIsValid(collid))
687+
{
688+
(*ncreatedp)++;
689+
690+
/* Must do CCI between inserts to handle duplicates correctly */
691+
CommandCounterIncrement();
692+
}
693+
694+
return enc;
695+
}
696+
697+
698+
#ifdef ENUM_SYSTEM_LOCALE
699+
/* parameter to be passed to the callback function win32_read_locale() */
700+
typedef struct
701+
{
702+
Oid nspid;
703+
int *ncreatedp;
704+
int *nvalidp;
705+
} CollParam;
706+
707+
/*
708+
* Callback function for EnumSystemLocalesEx() in
709+
* pg_import_system_collations(). Creates a collation for every valid locale
710+
* and a POSIX alias collation.
711+
*
712+
* The callback contract is to return TRUE to continue enumerating and FALSE
713+
* to stop enumerating. We always want to continue.
714+
*/
715+
static BOOL CALLBACK
716+
win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
717+
{
718+
CollParam *param = (CollParam *) lparam;
719+
char localebuf[NAMEDATALEN];
720+
int result;
721+
int enc;
722+
723+
(void) dwFlags;
724+
725+
result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
726+
NULL, NULL);
727+
728+
if (result == 0)
729+
{
730+
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
731+
elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
732+
return TRUE;
733+
}
734+
if (localebuf[0] == '\0')
735+
return TRUE;
736+
737+
enc = create_collation_from_locale(localebuf, param->nspid,
738+
param->nvalidp, param->ncreatedp);
739+
if (enc < 0)
740+
return TRUE;
741+
742+
/*
743+
* Windows will use hyphens between language and territory, where POSIX
744+
* uses an underscore. Simply create a POSIX alias.
745+
*/
746+
if (strchr(localebuf, '-'))
747+
{
748+
char alias[NAMEDATALEN];
749+
Oid collid;
750+
751+
strcpy(alias, localebuf);
752+
for (char *p = alias; *p; p++)
753+
if (*p == '-')
754+
*p = '_';
755+
756+
collid = CollationCreate(alias, param->nspid, GetUserId(),
757+
COLLPROVIDER_LIBC, true, enc,
758+
localebuf, localebuf, NULL,
759+
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
760+
true, true);
761+
if (OidIsValid(collid))
762+
{
763+
(*param->ncreatedp)++;
764+
765+
CommandCounterIncrement();
766+
}
767+
}
768+
769+
return TRUE;
770+
}
771+
#endif /* ENUM_SYSTEM_LOCALE */
772+
773+
613774
/*
614775
* pg_import_system_collations: add known system collations to pg_collation
615776
*/
@@ -668,58 +829,9 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
668829
}
669830
localebuf[len - 1] = '\0';
670831

671-
/*
672-
* Some systems have locale names that don't consist entirely of
673-
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
674-
* This is pretty silly, since we need the locale itself to
675-
* interpret the non-ASCII characters. We can't do much with
676-
* those, so we filter them out.
677-
*/
678-
if (!pg_is_ascii(localebuf))
679-
{
680-
elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf);
681-
continue;
682-
}
683-
684-
enc = pg_get_encoding_from_locale(localebuf, false);
832+
enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
685833
if (enc < 0)
686-
{
687-
elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"",
688-
localebuf);
689-
continue;
690-
}
691-
if (!PG_VALID_BE_ENCODING(enc))
692-
{
693-
elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf);
694834
continue;
695-
}
696-
if (enc == PG_SQL_ASCII)
697-
continue; /* C/POSIX are already in the catalog */
698-
699-
/* count valid locales found in operating system */
700-
nvalid++;
701-
702-
/*
703-
* Create a collation named the same as the locale, but quietly
704-
* doing nothing if it already exists. This is the behavior we
705-
* need even at initdb time, because some versions of "locale -a"
706-
* can report the same locale name more than once. And it's
707-
* convenient for later import runs, too, since you just about
708-
* always want to add on new locales without a lot of chatter
709-
* about existing ones.
710-
*/
711-
collid = CollationCreate(localebuf, nspid, GetUserId(),
712-
COLLPROVIDER_LIBC, true, enc,
713-
localebuf, localebuf, NULL,
714-
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
715-
true, true);
716-
if (OidIsValid(collid))
717-
{
718-
ncreated++;
719-
720-
/* Must do CCI between inserts to handle duplicates correctly */
721-
CommandCounterIncrement();
722-
}
723835

724836
/*
725837
* Generate aliases such as "en_US" in addition to "en_US.utf8"
@@ -857,5 +969,30 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
857969
}
858970
#endif /* USE_ICU */
859971

972+
/* Load collations known to WIN32 */
973+
#ifdef ENUM_SYSTEM_LOCALE
974+
{
975+
int nvalid = 0;
976+
CollParam param;
977+
978+
param.nspid = nspid;
979+
param.ncreatedp = &ncreated;
980+
param.nvalidp = &nvalid;
981+
982+
/*
983+
* Enumerate the locales that are either installed on or supported
984+
* by the OS.
985+
*/
986+
if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
987+
(LPARAM) &param, NULL))
988+
_dosmaperr(GetLastError());
989+
990+
/* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
991+
if (nvalid == 0)
992+
ereport(WARNING,
993+
(errmsg("no usable system locales were found")));
994+
}
995+
#endif /* ENUM_SYSTEM_LOCALE */
996+
860997
PG_RETURN_INT32(ncreated);
861998
}

0 commit comments

Comments
 (0)