PostgreSQL Source Code git master
pg_locale.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12/*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are
22 * permanently set to "C", and then we use temporary locale_t
23 * objects when we need to look up locale data based on the GUCs
24 * of the same name. Information is cached when the GUCs change.
25 * The cached information is only used by the formatting functions
26 * (to_char, etc.) and the money type. For the user, this should all be
27 * transparent.
28 *----------
29 */
30
31
32#include "postgres.h"
33
34#include <time.h>
35
36#include "access/htup_details.h"
38#include "catalog/pg_database.h"
39#include "common/hashfn.h"
40#include "common/string.h"
41#include "mb/pg_wchar.h"
42#include "miscadmin.h"
43#include "utils/builtins.h"
44#include "utils/formatting.h"
45#include "utils/guc_hooks.h"
46#include "utils/lsyscache.h"
47#include "utils/memutils.h"
48#include "utils/pg_locale.h"
49#include "utils/relcache.h"
50#include "utils/syscache.h"
51
52#ifdef WIN32
53#include <shlwapi.h>
54#endif
55
56/* Error triggered for locale-sensitive subroutines */
57#define PGLOCALE_SUPPORT_ERROR(provider) \
58 elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
59
60/*
61 * This should be large enough that most strings will fit, but small enough
62 * that we feel comfortable putting it on the stack
63 */
64#define TEXTBUFLEN 1024
65
66#define MAX_L10N_DATA 80
67
68/* pg_locale_builtin.c */
70extern char *get_collation_actual_version_builtin(const char *collcollate);
71
72/* pg_locale_icu.c */
73#ifdef USE_ICU
74extern UCollator *pg_ucol_open(const char *loc_str);
75extern char *get_collation_actual_version_icu(const char *collcollate);
76#endif
78
79/* pg_locale_libc.c */
81extern char *get_collation_actual_version_libc(const char *collcollate);
82
83extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
84 ssize_t srclen, pg_locale_t locale);
85extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
86 ssize_t srclen, pg_locale_t locale);
87extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
88 ssize_t srclen, pg_locale_t locale);
89extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
90 ssize_t srclen, pg_locale_t locale);
91
92extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
93 ssize_t srclen, pg_locale_t locale);
94extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
95 ssize_t srclen, pg_locale_t locale);
96extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
97 ssize_t srclen, pg_locale_t locale);
98extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
99 ssize_t srclen, pg_locale_t locale);
100
101extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
102 ssize_t srclen, pg_locale_t locale);
103extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
104 ssize_t srclen, pg_locale_t locale);
105extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
106 ssize_t srclen, pg_locale_t locale);
107
108/* GUC settings */
113
115
116/*
117 * lc_time localization cache.
118 *
119 * We use only the first 7 or 12 entries of these arrays. The last array
120 * element is left as NULL for the convenience of outside code that wants
121 * to sequentially scan these arrays.
122 */
127
128/* is the databases's LC_CTYPE the C locale? */
130
132
133/* indicates whether locale information cache is valid */
134static bool CurrentLocaleConvValid = false;
135static bool CurrentLCTimeValid = false;
136
137/* Cache for collation-related knowledge */
138
139typedef struct
140{
141 Oid collid; /* hash key: pg_collation OID */
142 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
143
144 /* needed for simplehash */
146 char status;
148
149#define SH_PREFIX collation_cache
150#define SH_ELEMENT_TYPE collation_cache_entry
151#define SH_KEY_TYPE Oid
152#define SH_KEY collid
153#define SH_HASH_KEY(tb, key) murmurhash32((uint32) key)
154#define SH_EQUAL(tb, a, b) (a == b)
155#define SH_GET_HASH(tb, a) a->hash
156#define SH_SCOPE static inline
157#define SH_STORE_HASH
158#define SH_DECLARE
159#define SH_DEFINE
160#include "lib/simplehash.h"
161
163static collation_cache_hash *CollationCache = NULL;
164
165/*
166 * The collation cache is often accessed repeatedly for the same collation, so
167 * remember the last one used.
168 */
171
172#if defined(WIN32) && defined(LC_MESSAGES)
173static char *IsoLocaleName(const char *);
174#endif
175
176/*
177 * pg_perm_setlocale
178 *
179 * This wraps the libc function setlocale(), with two additions. First, when
180 * changing LC_CTYPE, update gettext's encoding for the current message
181 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
182 * not on Windows. Second, if the operation is successful, the corresponding
183 * LC_XXX environment variable is set to match. By setting the environment
184 * variable, we ensure that any subsequent use of setlocale(..., "") will
185 * preserve the settings made through this routine. Of course, LC_ALL must
186 * also be unset to fully ensure that, but that has to be done elsewhere after
187 * all the individual LC_XXX variables have been set correctly. (Thank you
188 * Perl for making this kluge necessary.)
189 */
190char *
191pg_perm_setlocale(int category, const char *locale)
192{
193 char *result;
194 const char *envvar;
195
196#ifndef WIN32
197 result = setlocale(category, locale);
198#else
199
200 /*
201 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
202 * the given value is good and set it in the environment variables. We
203 * must ignore attempts to set to "", which means "keep using the old
204 * environment value".
205 */
206#ifdef LC_MESSAGES
207 if (category == LC_MESSAGES)
208 {
209 result = (char *) locale;
210 if (locale == NULL || locale[0] == '\0')
211 return result;
212 }
213 else
214#endif
215 result = setlocale(category, locale);
216#endif /* WIN32 */
217
218 if (result == NULL)
219 return result; /* fall out immediately on failure */
220
221 /*
222 * Use the right encoding in translated messages. Under ENABLE_NLS, let
223 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
224 * format strings are ASCII, but database-encoding strings may enter the
225 * message via %s. This makes the overall message encoding equal to the
226 * database encoding.
227 */
228 if (category == LC_CTYPE)
229 {
230 static char save_lc_ctype[LOCALE_NAME_BUFLEN];
231
232 /* copy setlocale() return value before callee invokes it again */
233 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
234 result = save_lc_ctype;
235
236#ifdef ENABLE_NLS
237 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
238#else
240#endif
241 }
242
243 switch (category)
244 {
245 case LC_COLLATE:
246 envvar = "LC_COLLATE";
247 break;
248 case LC_CTYPE:
249 envvar = "LC_CTYPE";
250 break;
251#ifdef LC_MESSAGES
252 case LC_MESSAGES:
253 envvar = "LC_MESSAGES";
254#ifdef WIN32
255 result = IsoLocaleName(locale);
256 if (result == NULL)
257 result = (char *) locale;
258 elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
259#endif /* WIN32 */
260 break;
261#endif /* LC_MESSAGES */
262 case LC_MONETARY:
263 envvar = "LC_MONETARY";
264 break;
265 case LC_NUMERIC:
266 envvar = "LC_NUMERIC";
267 break;
268 case LC_TIME:
269 envvar = "LC_TIME";
270 break;
271 default:
272 elog(FATAL, "unrecognized LC category: %d", category);
273 return NULL; /* keep compiler quiet */
274 }
275
276 if (setenv(envvar, result, 1) != 0)
277 return NULL;
278
279 return result;
280}
281
282
283/*
284 * Is the locale name valid for the locale category?
285 *
286 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
287 * canonical name is stored there. This is especially useful for figuring out
288 * what locale name "" means (ie, the server environment value). (Actually,
289 * it seems that on most implementations that's the only thing it's good for;
290 * we could wish that setlocale gave back a canonically spelled version of
291 * the locale name, but typically it doesn't.)
292 */
293bool
294check_locale(int category, const char *locale, char **canonname)
295{
296 char *save;
297 char *res;
298
299 /* Don't let Windows' non-ASCII locale names in. */
300 if (!pg_is_ascii(locale))
301 {
303 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
304 errmsg("locale name \"%s\" contains non-ASCII characters",
305 locale)));
306 return false;
307 }
308
309 if (canonname)
310 *canonname = NULL; /* in case of failure */
311
312 save = setlocale(category, NULL);
313 if (!save)
314 return false; /* won't happen, we hope */
315
316 /* save may be pointing at a modifiable scratch variable, see above. */
317 save = pstrdup(save);
318
319 /* set the locale with setlocale, to see if it accepts it. */
320 res = setlocale(category, locale);
321
322 /* save canonical name if requested. */
323 if (res && canonname)
324 *canonname = pstrdup(res);
325
326 /* restore old value. */
327 if (!setlocale(category, save))
328 elog(WARNING, "failed to restore old locale \"%s\"", save);
329 pfree(save);
330
331 /* Don't let Windows' non-ASCII locale names out. */
332 if (canonname && *canonname && !pg_is_ascii(*canonname))
333 {
335 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
336 errmsg("locale name \"%s\" contains non-ASCII characters",
337 *canonname)));
338 pfree(*canonname);
339 *canonname = NULL;
340 return false;
341 }
342
343 return (res != NULL);
344}
345
346
347/*
348 * GUC check/assign hooks
349 *
350 * For most locale categories, the assign hook doesn't actually set the locale
351 * permanently, just reset flags so that the next use will cache the
352 * appropriate values. (See explanation at the top of this file.)
353 *
354 * Note: we accept value = "" as selecting the postmaster's environment
355 * value, whatever it was (so long as the environment setting is legal).
356 * This will have been locked down by an earlier call to pg_perm_setlocale.
357 */
358bool
360{
361 return check_locale(LC_MONETARY, *newval, NULL);
362}
363
364void
365assign_locale_monetary(const char *newval, void *extra)
366{
368}
369
370bool
372{
373 return check_locale(LC_NUMERIC, *newval, NULL);
374}
375
376void
377assign_locale_numeric(const char *newval, void *extra)
378{
380}
381
382bool
384{
385 return check_locale(LC_TIME, *newval, NULL);
386}
387
388void
389assign_locale_time(const char *newval, void *extra)
390{
391 CurrentLCTimeValid = false;
392}
393
394/*
395 * We allow LC_MESSAGES to actually be set globally.
396 *
397 * Note: we normally disallow value = "" because it wouldn't have consistent
398 * semantics (it'd effectively just use the previous value). However, this
399 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
400 * not even if the attempted setting fails due to invalid environment value.
401 * The idea there is just to accept the environment setting *if possible*
402 * during startup, until we can read the proper value from postgresql.conf.
403 */
404bool
406{
407 if (**newval == '\0')
408 {
409 if (source == PGC_S_DEFAULT)
410 return true;
411 else
412 return false;
413 }
414
415 /*
416 * LC_MESSAGES category does not exist everywhere, but accept it anyway
417 *
418 * On Windows, we can't even check the value, so accept blindly
419 */
420#if defined(LC_MESSAGES) && !defined(WIN32)
421 return check_locale(LC_MESSAGES, *newval, NULL);
422#else
423 return true;
424#endif
425}
426
427void
428assign_locale_messages(const char *newval, void *extra)
429{
430 /*
431 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
432 * We ignore failure, as per comment above.
433 */
434#ifdef LC_MESSAGES
435 (void) pg_perm_setlocale(LC_MESSAGES, newval);
436#endif
437}
438
439
440/*
441 * Frees the malloced content of a struct lconv. (But not the struct
442 * itself.) It's important that this not throw elog(ERROR).
443 */
444static void
445free_struct_lconv(struct lconv *s)
446{
447 free(s->decimal_point);
448 free(s->thousands_sep);
449 free(s->grouping);
450 free(s->int_curr_symbol);
451 free(s->currency_symbol);
452 free(s->mon_decimal_point);
453 free(s->mon_thousands_sep);
454 free(s->mon_grouping);
455 free(s->positive_sign);
456 free(s->negative_sign);
457}
458
459/*
460 * Check that all fields of a struct lconv (or at least, the ones we care
461 * about) are non-NULL. The field list must match free_struct_lconv().
462 */
463static bool
464struct_lconv_is_valid(struct lconv *s)
465{
466 if (s->decimal_point == NULL)
467 return false;
468 if (s->thousands_sep == NULL)
469 return false;
470 if (s->grouping == NULL)
471 return false;
472 if (s->int_curr_symbol == NULL)
473 return false;
474 if (s->currency_symbol == NULL)
475 return false;
476 if (s->mon_decimal_point == NULL)
477 return false;
478 if (s->mon_thousands_sep == NULL)
479 return false;
480 if (s->mon_grouping == NULL)
481 return false;
482 if (s->positive_sign == NULL)
483 return false;
484 if (s->negative_sign == NULL)
485 return false;
486 return true;
487}
488
489
490/*
491 * Convert the strdup'd string at *str from the specified encoding to the
492 * database encoding.
493 */
494static void
496{
497 char *pstr;
498 char *mstr;
499
500 /* convert the string to the database encoding */
501 pstr = pg_any_to_server(*str, strlen(*str), encoding);
502 if (pstr == *str)
503 return; /* no conversion happened */
504
505 /* need it malloc'd not palloc'd */
506 mstr = strdup(pstr);
507 if (mstr == NULL)
509 (errcode(ERRCODE_OUT_OF_MEMORY),
510 errmsg("out of memory")));
511
512 /* replace old string */
513 free(*str);
514 *str = mstr;
515
516 pfree(pstr);
517}
518
519
520/*
521 * Return the POSIX lconv struct (contains number/money formatting
522 * information) with locale information for all categories.
523 */
524struct lconv *
526{
527 static struct lconv CurrentLocaleConv;
528 static bool CurrentLocaleConvAllocated = false;
529 struct lconv *extlconv;
530 struct lconv tmp;
531 struct lconv worklconv = {0};
532
533 /* Did we do it already? */
535 return &CurrentLocaleConv;
536
537 /* Free any already-allocated storage */
538 if (CurrentLocaleConvAllocated)
539 {
540 free_struct_lconv(&CurrentLocaleConv);
541 CurrentLocaleConvAllocated = false;
542 }
543
544 /*
545 * Use thread-safe method of obtaining a copy of lconv from the operating
546 * system.
547 */
550 &tmp) != 0)
551 elog(ERROR,
552 "could not get lconv for LC_MONETARY = \"%s\", LC_NUMERIC = \"%s\": %m",
554
555 /* Must copy data now so we can re-encode it. */
556 extlconv = &tmp;
557 worklconv.decimal_point = strdup(extlconv->decimal_point);
558 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
559 worklconv.grouping = strdup(extlconv->grouping);
560 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
561 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
562 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
563 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
564 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
565 worklconv.positive_sign = strdup(extlconv->positive_sign);
566 worklconv.negative_sign = strdup(extlconv->negative_sign);
567 /* Copy scalar fields as well */
568 worklconv.int_frac_digits = extlconv->int_frac_digits;
569 worklconv.frac_digits = extlconv->frac_digits;
570 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
571 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
572 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
573 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
574 worklconv.p_sign_posn = extlconv->p_sign_posn;
575 worklconv.n_sign_posn = extlconv->n_sign_posn;
576
577 /* Free the contents of the object populated by pg_localeconv_r(). */
578 pg_localeconv_free(&tmp);
579
580 /* If any of the preceding strdup calls failed, complain now. */
581 if (!struct_lconv_is_valid(&worklconv))
583 (errcode(ERRCODE_OUT_OF_MEMORY),
584 errmsg("out of memory")));
585
586 PG_TRY();
587 {
588 int encoding;
589
590 /*
591 * Now we must perform encoding conversion from whatever's associated
592 * with the locales into the database encoding. If we can't identify
593 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
594 * use PG_SQL_ASCII, which will result in just validating that the
595 * strings are OK in the database encoding.
596 */
598 if (encoding < 0)
600
601 db_encoding_convert(encoding, &worklconv.decimal_point);
602 db_encoding_convert(encoding, &worklconv.thousands_sep);
603 /* grouping is not text and does not require conversion */
604
606 if (encoding < 0)
608
609 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
610 db_encoding_convert(encoding, &worklconv.currency_symbol);
611 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
612 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
613 /* mon_grouping is not text and does not require conversion */
614 db_encoding_convert(encoding, &worklconv.positive_sign);
615 db_encoding_convert(encoding, &worklconv.negative_sign);
616 }
617 PG_CATCH();
618 {
619 free_struct_lconv(&worklconv);
620 PG_RE_THROW();
621 }
622 PG_END_TRY();
623
624 /*
625 * Everything is good, so save the results.
626 */
627 CurrentLocaleConv = worklconv;
628 CurrentLocaleConvAllocated = true;
630 return &CurrentLocaleConv;
631}
632
633#ifdef WIN32
634/*
635 * On Windows, strftime() returns its output in encoding CP_ACP (the default
636 * operating system codepage for the computer), which is likely different
637 * from SERVER_ENCODING. This is especially important in Japanese versions
638 * of Windows which will use SJIS encoding, which we don't support as a
639 * server encoding.
640 *
641 * So, instead of using strftime(), use wcsftime() to return the value in
642 * wide characters (internally UTF16) and then convert to UTF8, which we
643 * know how to handle directly.
644 *
645 * Note that this only affects the calls to strftime() in this file, which are
646 * used to get the locale-aware strings. Other parts of the backend use
647 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
648 */
649static size_t
650strftime_l_win32(char *dst, size_t dstlen,
651 const char *format, const struct tm *tm, locale_t locale)
652{
653 size_t len;
654 wchar_t wformat[8]; /* formats used below need 3 chars */
655 wchar_t wbuf[MAX_L10N_DATA];
656
657 /*
658 * Get a wchar_t version of the format string. We only actually use
659 * plain-ASCII formats in this file, so we can say that they're UTF8.
660 */
661 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
662 wformat, lengthof(wformat));
663 if (len == 0)
664 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
665 GetLastError());
666
667 len = _wcsftime_l(wbuf, MAX_L10N_DATA, wformat, tm, locale);
668 if (len == 0)
669 {
670 /*
671 * wcsftime failed, possibly because the result would not fit in
672 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
673 */
674 return 0;
675 }
676
677 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
678 NULL, NULL);
679 if (len == 0)
680 elog(ERROR, "could not convert string to UTF-8: error code %lu",
681 GetLastError());
682
683 dst[len] = '\0';
684
685 return len;
686}
687
688/* redefine strftime_l() */
689#define strftime_l(a,b,c,d,e) strftime_l_win32(a,b,c,d,e)
690#endif /* WIN32 */
691
692/*
693 * Subroutine for cache_locale_time().
694 * Convert the given string from encoding "encoding" to the database
695 * encoding, and store the result at *dst, replacing any previous value.
696 */
697static void
698cache_single_string(char **dst, const char *src, int encoding)
699{
700 char *ptr;
701 char *olddst;
702
703 /* Convert the string to the database encoding, or validate it's OK */
704 ptr = pg_any_to_server(src, strlen(src), encoding);
705
706 /* Store the string in long-lived storage, replacing any previous value */
707 olddst = *dst;
709 if (olddst)
710 pfree(olddst);
711
712 /* Might as well clean up any palloc'd conversion result, too */
713 if (ptr != src)
714 pfree(ptr);
715}
716
717/*
718 * Update the lc_time localization cache variables if needed.
719 */
720void
722{
723 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
724 char *bufptr;
725 time_t timenow;
726 struct tm *timeinfo;
727 struct tm timeinfobuf;
728 bool strftimefail = false;
729 int encoding;
730 int i;
732
733 /* did we do this already? */
735 return;
736
737 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
738
739 errno = ENOENT;
740#ifdef WIN32
741 locale = _create_locale(LC_ALL, locale_time);
742 if (locale == (locale_t) 0)
743 _dosmaperr(GetLastError());
744#else
745 locale = newlocale(LC_ALL_MASK, locale_time, (locale_t) 0);
746#endif
747 if (!locale)
749
750 /* We use times close to current time as data for strftime(). */
751 timenow = time(NULL);
752 timeinfo = gmtime_r(&timenow, &timeinfobuf);
753
754 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
755 bufptr = buf;
756
757 /*
758 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
759 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
760 * error.) An implementation might report errors (e.g. ENOMEM) by
761 * returning 0 (or, less plausibly, a negative value) and setting errno.
762 * Report errno just in case the implementation did that, but clear it in
763 * advance of the calls so we don't emit a stale, unrelated errno.
764 */
765 errno = 0;
766
767 /* localized days */
768 for (i = 0; i < 7; i++)
769 {
770 timeinfo->tm_wday = i;
771 if (strftime_l(bufptr, MAX_L10N_DATA, "%a", timeinfo, locale) <= 0)
772 strftimefail = true;
773 bufptr += MAX_L10N_DATA;
774 if (strftime_l(bufptr, MAX_L10N_DATA, "%A", timeinfo, locale) <= 0)
775 strftimefail = true;
776 bufptr += MAX_L10N_DATA;
777 }
778
779 /* localized months */
780 for (i = 0; i < 12; i++)
781 {
782 timeinfo->tm_mon = i;
783 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
784 if (strftime_l(bufptr, MAX_L10N_DATA, "%b", timeinfo, locale) <= 0)
785 strftimefail = true;
786 bufptr += MAX_L10N_DATA;
787 if (strftime_l(bufptr, MAX_L10N_DATA, "%B", timeinfo, locale) <= 0)
788 strftimefail = true;
789 bufptr += MAX_L10N_DATA;
790 }
791
792#ifdef WIN32
793 _free_locale(locale);
794#else
795 freelocale(locale);
796#endif
797
798 /*
799 * At this point we've done our best to clean up, and can throw errors, or
800 * call functions that might throw errors, with a clean conscience.
801 */
802 if (strftimefail)
803 elog(ERROR, "strftime_l() failed");
804
805#ifndef WIN32
806
807 /*
808 * As in PGLC_localeconv(), we must convert strftime()'s output from the
809 * encoding implied by LC_TIME to the database encoding. If we can't
810 * identify the LC_TIME encoding, just perform encoding validation.
811 */
813 if (encoding < 0)
815
816#else
817
818 /*
819 * On Windows, strftime_win32() always returns UTF8 data, so convert from
820 * that if necessary.
821 */
823
824#endif /* WIN32 */
825
826 bufptr = buf;
827
828 /* localized days */
829 for (i = 0; i < 7; i++)
830 {
832 bufptr += MAX_L10N_DATA;
834 bufptr += MAX_L10N_DATA;
835 }
836 localized_abbrev_days[7] = NULL;
837 localized_full_days[7] = NULL;
838
839 /* localized months */
840 for (i = 0; i < 12; i++)
841 {
843 bufptr += MAX_L10N_DATA;
845 bufptr += MAX_L10N_DATA;
846 }
847 localized_abbrev_months[12] = NULL;
848 localized_full_months[12] = NULL;
849
850 CurrentLCTimeValid = true;
851}
852
853
854#if defined(WIN32) && defined(LC_MESSAGES)
855/*
856 * Convert a Windows setlocale() argument to a Unix-style one.
857 *
858 * Regardless of platform, we install message catalogs under a Unix-style
859 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
860 * following that style will elicit localized interface strings.
861 *
862 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
863 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
864 * case-insensitive. setlocale() returns the fully-qualified form; for
865 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
866 * setlocale() and _create_locale() select a "locale identifier"[1] and store
867 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
868 * ISO 639 language and the ISO 3166 country. Character encoding does not
869 * matter, because the server and client encodings govern that.
870 *
871 * Windows Vista introduced the "locale name" concept[2], closely following
872 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
873 * Studio 2012, setlocale() accepts locale names in addition to the strings it
874 * accepted historically. It does not standardize them; setlocale("Th-tH")
875 * returns "Th-tH". setlocale(category, "") still returns a traditional
876 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
877 * content to carry locale names instead of locale identifiers.
878 *
879 * Visual Studio 2015 should still be able to do the same as Visual Studio
880 * 2012, but the declaration of locale_name is missing in _locale_t, causing
881 * this code compilation to fail, hence this falls back instead on to
882 * enumerating all system locales by using EnumSystemLocalesEx to find the
883 * required locale name. If the input argument is in Unix-style then we can
884 * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
885 * LOCALE_SNAME.
886 *
887 * This function returns a pointer to a static buffer bearing the converted
888 * name or NULL if conversion fails.
889 *
890 * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
891 * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
892 */
893
894/*
895 * Callback function for EnumSystemLocalesEx() in get_iso_localename().
896 *
897 * This function enumerates all system locales, searching for one that matches
898 * an input with the format: <Language>[_<Country>], e.g.
899 * English[_United States]
900 *
901 * The input is a three wchar_t array as an LPARAM. The first element is the
902 * locale_name we want to match, the second element is an allocated buffer
903 * where the Unix-style locale is copied if a match is found, and the third
904 * element is the search status, 1 if a match was found, 0 otherwise.
905 */
906static BOOL CALLBACK
907search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
908{
909 wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
910 wchar_t **argv;
911
912 (void) (dwFlags);
913
914 argv = (wchar_t **) lparam;
915 *argv[2] = (wchar_t) 0;
916
917 memset(test_locale, 0, sizeof(test_locale));
918
919 /* Get the name of the <Language> in English */
920 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
921 test_locale, LOCALE_NAME_MAX_LENGTH))
922 {
923 /*
924 * If the enumerated locale does not have a hyphen ("en") OR the
925 * locale_name input does not have an underscore ("English"), we only
926 * need to compare the <Language> tags.
927 */
928 if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
929 {
930 if (_wcsicmp(argv[0], test_locale) == 0)
931 {
932 wcscpy(argv[1], pStr);
933 *argv[2] = (wchar_t) 1;
934 return FALSE;
935 }
936 }
937
938 /*
939 * We have to compare a full <Language>_<Country> tag, so we append
940 * the underscore and name of the country/region in English, e.g.
941 * "English_United States".
942 */
943 else
944 {
945 size_t len;
946
947 wcscat(test_locale, L"_");
948 len = wcslen(test_locale);
949 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
950 test_locale + len,
951 LOCALE_NAME_MAX_LENGTH - len))
952 {
953 if (_wcsicmp(argv[0], test_locale) == 0)
954 {
955 wcscpy(argv[1], pStr);
956 *argv[2] = (wchar_t) 1;
957 return FALSE;
958 }
959 }
960 }
961 }
962
963 return TRUE;
964}
965
966/*
967 * This function converts a Windows locale name to an ISO formatted version
968 * for Visual Studio 2015 or greater.
969 *
970 * Returns NULL, if no valid conversion was found.
971 */
972static char *
973get_iso_localename(const char *winlocname)
974{
975 wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
976 wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
977 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
978 char *period;
979 int len;
980 int ret_val;
981
982 /*
983 * Valid locales have the following syntax:
984 * <Language>[_<Country>[.<CodePage>]]
985 *
986 * GetLocaleInfoEx can only take locale name without code-page and for the
987 * purpose of this API the code-page doesn't matter.
988 */
989 period = strchr(winlocname, '.');
990 if (period != NULL)
991 len = period - winlocname;
992 else
993 len = pg_mbstrlen(winlocname);
994
995 memset(wc_locale_name, 0, sizeof(wc_locale_name));
996 memset(buffer, 0, sizeof(buffer));
997 MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
998 LOCALE_NAME_MAX_LENGTH);
999
1000 /*
1001 * If the lc_messages is already a Unix-style string, we have a direct
1002 * match with LOCALE_SNAME, e.g. en-US, en_US.
1003 */
1004 ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1005 LOCALE_NAME_MAX_LENGTH);
1006 if (!ret_val)
1007 {
1008 /*
1009 * Search for a locale in the system that matches language and country
1010 * name.
1011 */
1012 wchar_t *argv[3];
1013
1014 argv[0] = wc_locale_name;
1015 argv[1] = buffer;
1016 argv[2] = (wchar_t *) &ret_val;
1017 EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1018 NULL);
1019 }
1020
1021 if (ret_val)
1022 {
1023 size_t rc;
1024 char *hyphen;
1025
1026 /* Locale names use only ASCII, any conversion locale suffices. */
1027 rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1028 if (rc == -1 || rc == sizeof(iso_lc_messages))
1029 return NULL;
1030
1031 /*
1032 * Since the message catalogs sit on a case-insensitive filesystem, we
1033 * need not standardize letter case here. So long as we do not ship
1034 * message catalogs for which it would matter, we also need not
1035 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1036 * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1037 */
1038 hyphen = strchr(iso_lc_messages, '-');
1039 if (hyphen)
1040 *hyphen = '_';
1041 return iso_lc_messages;
1042 }
1043
1044 return NULL;
1045}
1046
1047static char *
1048IsoLocaleName(const char *winlocname)
1049{
1050 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1051
1052 if (pg_strcasecmp("c", winlocname) == 0 ||
1053 pg_strcasecmp("posix", winlocname) == 0)
1054 {
1055 strcpy(iso_lc_messages, "C");
1056 return iso_lc_messages;
1057 }
1058 else
1059 return get_iso_localename(winlocname);
1060}
1061
1062#endif /* WIN32 && LC_MESSAGES */
1063
1064/*
1065 * Create a new pg_locale_t struct for the given collation oid.
1066 */
1067static pg_locale_t
1069{
1070 HeapTuple tp;
1071 Form_pg_collation collform;
1072 pg_locale_t result;
1073 Datum datum;
1074 bool isnull;
1075
1076 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1077 if (!HeapTupleIsValid(tp))
1078 elog(ERROR, "cache lookup failed for collation %u", collid);
1079 collform = (Form_pg_collation) GETSTRUCT(tp);
1080
1081 if (collform->collprovider == COLLPROVIDER_BUILTIN)
1082 result = create_pg_locale_builtin(collid, context);
1083 else if (collform->collprovider == COLLPROVIDER_ICU)
1084 result = create_pg_locale_icu(collid, context);
1085 else if (collform->collprovider == COLLPROVIDER_LIBC)
1086 result = create_pg_locale_libc(collid, context);
1087 else
1088 /* shouldn't happen */
1089 PGLOCALE_SUPPORT_ERROR(collform->collprovider);
1090
1091 result->is_default = false;
1092
1093 Assert((result->collate_is_c && result->collate == NULL) ||
1094 (!result->collate_is_c && result->collate != NULL));
1095
1096 datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1097 &isnull);
1098 if (!isnull)
1099 {
1100 char *actual_versionstr;
1101 char *collversionstr;
1102
1103 collversionstr = TextDatumGetCString(datum);
1104
1105 if (collform->collprovider == COLLPROVIDER_LIBC)
1106 datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1107 else
1108 datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1109
1110 actual_versionstr = get_collation_actual_version(collform->collprovider,
1111 TextDatumGetCString(datum));
1112 if (!actual_versionstr)
1113 {
1114 /*
1115 * This could happen when specifying a version in CREATE COLLATION
1116 * but the provider does not support versioning, or manually
1117 * creating a mess in the catalogs.
1118 */
1119 ereport(ERROR,
1120 (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1121 NameStr(collform->collname))));
1122 }
1123
1124 if (strcmp(actual_versionstr, collversionstr) != 0)
1126 (errmsg("collation \"%s\" has version mismatch",
1127 NameStr(collform->collname)),
1128 errdetail("The collation in the database was created using version %s, "
1129 "but the operating system provides version %s.",
1130 collversionstr, actual_versionstr),
1131 errhint("Rebuild all objects affected by this collation and run "
1132 "ALTER COLLATION %s REFRESH VERSION, "
1133 "or build PostgreSQL with the right library version.",
1134 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1135 NameStr(collform->collname)))));
1136 }
1137
1138 ReleaseSysCache(tp);
1139
1140 return result;
1141}
1142
1143/*
1144 * Initialize default_locale with database locale settings.
1145 */
1146void
1148{
1149 HeapTuple tup;
1150 Form_pg_database dbform;
1151 pg_locale_t result;
1152
1153 Assert(default_locale == NULL);
1154
1155 /* Fetch our pg_database row normally, via syscache */
1156 tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1157 if (!HeapTupleIsValid(tup))
1158 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
1159 dbform = (Form_pg_database) GETSTRUCT(tup);
1160
1161 if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
1162 result = create_pg_locale_builtin(DEFAULT_COLLATION_OID,
1164 else if (dbform->datlocprovider == COLLPROVIDER_ICU)
1165 result = create_pg_locale_icu(DEFAULT_COLLATION_OID,
1167 else if (dbform->datlocprovider == COLLPROVIDER_LIBC)
1168 result = create_pg_locale_libc(DEFAULT_COLLATION_OID,
1170 else
1171 /* shouldn't happen */
1172 PGLOCALE_SUPPORT_ERROR(dbform->datlocprovider);
1173
1174 result->is_default = true;
1175 ReleaseSysCache(tup);
1176
1177 default_locale = result;
1178}
1179
1180/*
1181 * Create a pg_locale_t from a collation OID. Results are cached for the
1182 * lifetime of the backend. Thus, do not free the result with freelocale().
1183 *
1184 * For simplicity, we always generate COLLATE + CTYPE even though we
1185 * might only need one of them. Since this is called only once per session,
1186 * it shouldn't cost much.
1187 */
1190{
1191 collation_cache_entry *cache_entry;
1192 bool found;
1193
1194 if (collid == DEFAULT_COLLATION_OID)
1195 return default_locale;
1196
1197 if (!OidIsValid(collid))
1198 elog(ERROR, "cache lookup failed for collation %u", collid);
1199
1201
1204
1205 if (CollationCache == NULL)
1206 {
1208 "collation cache",
1210 CollationCache = collation_cache_create(CollationCacheContext,
1211 16, NULL);
1212 }
1213
1214 cache_entry = collation_cache_insert(CollationCache, collid, &found);
1215 if (!found)
1216 {
1217 /*
1218 * Make sure cache entry is marked invalid, in case we fail before
1219 * setting things.
1220 */
1221 cache_entry->locale = 0;
1222 }
1223
1224 if (cache_entry->locale == 0)
1225 {
1227 }
1228
1230 last_collation_cache_locale = cache_entry->locale;
1231
1232 return cache_entry->locale;
1233}
1234
1235/*
1236 * Get provider-specific collation version string for the given collation from
1237 * the operating system/library.
1238 */
1239char *
1240get_collation_actual_version(char collprovider, const char *collcollate)
1241{
1242 char *collversion = NULL;
1243
1244 if (collprovider == COLLPROVIDER_BUILTIN)
1245 collversion = get_collation_actual_version_builtin(collcollate);
1246#ifdef USE_ICU
1247 else if (collprovider == COLLPROVIDER_ICU)
1248 collversion = get_collation_actual_version_icu(collcollate);
1249#endif
1250 else if (collprovider == COLLPROVIDER_LIBC)
1251 collversion = get_collation_actual_version_libc(collcollate);
1252
1253 return collversion;
1254}
1255
1256size_t
1257pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1259{
1260 if (locale->provider == COLLPROVIDER_BUILTIN)
1261 return strlower_builtin(dst, dstsize, src, srclen, locale);
1262#ifdef USE_ICU
1263 else if (locale->provider == COLLPROVIDER_ICU)
1264 return strlower_icu(dst, dstsize, src, srclen, locale);
1265#endif
1266 else if (locale->provider == COLLPROVIDER_LIBC)
1267 return strlower_libc(dst, dstsize, src, srclen, locale);
1268 else
1269 /* shouldn't happen */
1270 PGLOCALE_SUPPORT_ERROR(locale->provider);
1271
1272 return 0; /* keep compiler quiet */
1273}
1274
1275size_t
1276pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1278{
1279 if (locale->provider == COLLPROVIDER_BUILTIN)
1280 return strtitle_builtin(dst, dstsize, src, srclen, locale);
1281#ifdef USE_ICU
1282 else if (locale->provider == COLLPROVIDER_ICU)
1283 return strtitle_icu(dst, dstsize, src, srclen, locale);
1284#endif
1285 else if (locale->provider == COLLPROVIDER_LIBC)
1286 return strtitle_libc(dst, dstsize, src, srclen, locale);
1287 else
1288 /* shouldn't happen */
1289 PGLOCALE_SUPPORT_ERROR(locale->provider);
1290
1291 return 0; /* keep compiler quiet */
1292}
1293
1294size_t
1295pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1297{
1298 if (locale->provider == COLLPROVIDER_BUILTIN)
1299 return strupper_builtin(dst, dstsize, src, srclen, locale);
1300#ifdef USE_ICU
1301 else if (locale->provider == COLLPROVIDER_ICU)
1302 return strupper_icu(dst, dstsize, src, srclen, locale);
1303#endif
1304 else if (locale->provider == COLLPROVIDER_LIBC)
1305 return strupper_libc(dst, dstsize, src, srclen, locale);
1306 else
1307 /* shouldn't happen */
1308 PGLOCALE_SUPPORT_ERROR(locale->provider);
1309
1310 return 0; /* keep compiler quiet */
1311}
1312
1313size_t
1314pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1316{
1317 if (locale->provider == COLLPROVIDER_BUILTIN)
1318 return strfold_builtin(dst, dstsize, src, srclen, locale);
1319#ifdef USE_ICU
1320 else if (locale->provider == COLLPROVIDER_ICU)
1321 return strfold_icu(dst, dstsize, src, srclen, locale);
1322#endif
1323 /* for libc, just use strlower */
1324 else if (locale->provider == COLLPROVIDER_LIBC)
1325 return strlower_libc(dst, dstsize, src, srclen, locale);
1326 else
1327 /* shouldn't happen */
1328 PGLOCALE_SUPPORT_ERROR(locale->provider);
1329
1330 return 0; /* keep compiler quiet */
1331}
1332
1333/*
1334 * pg_strcoll
1335 *
1336 * Like pg_strncoll for NUL-terminated input strings.
1337 */
1338int
1339pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
1340{
1341 return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
1342}
1343
1344/*
1345 * pg_strncoll
1346 *
1347 * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll_l() or wcscoll_l() as
1348 * appropriate for the given locale, platform, and database encoding. If the
1349 * locale is not specified, use the database collation.
1350 *
1351 * The input strings must be encoded in the database encoding. If an input
1352 * string is NUL-terminated, its length may be specified as -1.
1353 *
1354 * The caller is responsible for breaking ties if the collation is
1355 * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
1356 * easily account for deterministic collations.
1357 */
1358int
1359pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
1361{
1362 return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
1363}
1364
1365/*
1366 * Return true if the collation provider supports pg_strxfrm() and
1367 * pg_strnxfrm(); otherwise false.
1368 *
1369 *
1370 * No similar problem is known for the ICU provider.
1371 */
1372bool
1374{
1375 /*
1376 * locale->collate->strnxfrm is still a required method, even if it may
1377 * have the wrong behavior, because the planner uses it for estimates in
1378 * some cases.
1379 */
1380 return locale->collate->strxfrm_is_safe;
1381}
1382
1383/*
1384 * pg_strxfrm
1385 *
1386 * Like pg_strnxfrm for a NUL-terminated input string.
1387 */
1388size_t
1389pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
1390{
1391 return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
1392}
1393
1394/*
1395 * pg_strnxfrm
1396 *
1397 * Transforms 'src' to a nul-terminated string stored in 'dest' such that
1398 * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
1399 * untransformed strings.
1400 *
1401 * The input string must be encoded in the database encoding. If the input
1402 * string is NUL-terminated, its length may be specified as -1. If 'destsize'
1403 * is zero, 'dest' may be NULL.
1404 *
1405 * Not all providers support pg_strnxfrm() safely. The caller should check
1406 * pg_strxfrm_enabled() first, otherwise this function may return wrong
1407 * results or an error.
1408 *
1409 * Returns the number of bytes needed (or more) to store the transformed
1410 * string, excluding the terminating nul byte. If the value returned is
1411 * 'destsize' or greater, the resulting contents of 'dest' are undefined.
1412 */
1413size_t
1414pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
1416{
1417 return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
1418}
1419
1420/*
1421 * Return true if the collation provider supports pg_strxfrm_prefix() and
1422 * pg_strnxfrm_prefix(); otherwise false.
1423 */
1424bool
1426{
1427 return (locale->collate->strnxfrm_prefix != NULL);
1428}
1429
1430/*
1431 * pg_strxfrm_prefix
1432 *
1433 * Like pg_strnxfrm_prefix for a NUL-terminated input string.
1434 */
1435size_t
1436pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
1438{
1439 return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
1440}
1441
1442/*
1443 * pg_strnxfrm_prefix
1444 *
1445 * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
1446 * memcmp() on the byte sequence is equivalent to pg_strncoll() on
1447 * untransformed strings. The result is not nul-terminated.
1448 *
1449 * The input string must be encoded in the database encoding. If the input
1450 * string is NUL-terminated, its length may be specified as -1.
1451 *
1452 * Not all providers support pg_strnxfrm_prefix() safely. The caller should
1453 * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
1454 * wrong results or an error.
1455 *
1456 * If destsize is not large enough to hold the resulting byte sequence, stores
1457 * only the first destsize bytes in 'dest'. Returns the number of bytes
1458 * actually copied to 'dest'.
1459 */
1460size_t
1461pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
1462 ssize_t srclen, pg_locale_t locale)
1463{
1464 return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
1465}
1466
1467/*
1468 * Return required encoding ID for the given locale, or -1 if any encoding is
1469 * valid for the locale.
1470 */
1471int
1473{
1474 if (strcmp(locale, "C") == 0)
1475 return -1;
1476 else if (strcmp(locale, "C.UTF-8") == 0)
1477 return PG_UTF8;
1478 else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1479 return PG_UTF8;
1480
1481
1482 ereport(ERROR,
1483 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1484 errmsg("invalid locale name \"%s\" for builtin provider",
1485 locale)));
1486
1487 return 0; /* keep compiler quiet */
1488}
1489
1490
1491/*
1492 * Validate the locale and encoding combination, and return the canonical form
1493 * of the locale name.
1494 */
1495const char *
1497{
1498 const char *canonical_name = NULL;
1499 int required_encoding;
1500
1501 if (strcmp(locale, "C") == 0)
1502 canonical_name = "C";
1503 else if (strcmp(locale, "C.UTF-8") == 0 || strcmp(locale, "C.UTF8") == 0)
1504 canonical_name = "C.UTF-8";
1505 else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1506 canonical_name = "PG_UNICODE_FAST";
1507
1508 if (!canonical_name)
1509 ereport(ERROR,
1510 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1511 errmsg("invalid locale name \"%s\" for builtin provider",
1512 locale)));
1513
1514 required_encoding = builtin_locale_encoding(canonical_name);
1515 if (required_encoding >= 0 && encoding != required_encoding)
1516 ereport(ERROR,
1517 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1518 errmsg("encoding \"%s\" does not match locale \"%s\"",
1520
1521 return canonical_name;
1522}
1523
1524
1525
1526/*
1527 * Return the BCP47 language tag representation of the requested locale.
1528 *
1529 * This function should be called before passing the string to ucol_open(),
1530 * because conversion to a language tag also performs "level 2
1531 * canonicalization". In addition to producing a consistent format, level 2
1532 * canonicalization is able to more accurately interpret different input
1533 * locale string formats, such as POSIX and .NET IDs.
1534 */
1535char *
1536icu_language_tag(const char *loc_str, int elevel)
1537{
1538#ifdef USE_ICU
1539 UErrorCode status;
1540 char *langtag;
1541 size_t buflen = 32; /* arbitrary starting buffer size */
1542 const bool strict = true;
1543
1544 /*
1545 * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
1546 * RFC5646 section 4.4). Additionally, in older ICU versions,
1547 * uloc_toLanguageTag() doesn't always return the ultimate length on the
1548 * first call, necessitating a loop.
1549 */
1550 langtag = palloc(buflen);
1551 while (true)
1552 {
1553 status = U_ZERO_ERROR;
1554 uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
1555
1556 /* try again if the buffer is not large enough */
1557 if ((status == U_BUFFER_OVERFLOW_ERROR ||
1558 status == U_STRING_NOT_TERMINATED_WARNING) &&
1559 buflen < MaxAllocSize)
1560 {
1561 buflen = Min(buflen * 2, MaxAllocSize);
1562 langtag = repalloc(langtag, buflen);
1563 continue;
1564 }
1565
1566 break;
1567 }
1568
1569 if (U_FAILURE(status))
1570 {
1571 pfree(langtag);
1572
1573 if (elevel > 0)
1574 ereport(elevel,
1575 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1576 errmsg("could not convert locale name \"%s\" to language tag: %s",
1577 loc_str, u_errorName(status))));
1578 return NULL;
1579 }
1580
1581 return langtag;
1582#else /* not USE_ICU */
1583 ereport(ERROR,
1584 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1585 errmsg("ICU is not supported in this build")));
1586 return NULL; /* keep compiler quiet */
1587#endif /* not USE_ICU */
1588}
1589
1590/*
1591 * Perform best-effort check that the locale is a valid one.
1592 */
1593void
1594icu_validate_locale(const char *loc_str)
1595{
1596#ifdef USE_ICU
1597 UCollator *collator;
1598 UErrorCode status;
1599 char lang[ULOC_LANG_CAPACITY];
1600 bool found = false;
1601 int elevel = icu_validation_level;
1602
1603 /* no validation */
1604 if (elevel < 0)
1605 return;
1606
1607 /* downgrade to WARNING during pg_upgrade */
1608 if (IsBinaryUpgrade && elevel > WARNING)
1609 elevel = WARNING;
1610
1611 /* validate that we can extract the language */
1612 status = U_ZERO_ERROR;
1613 uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
1614 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1615 {
1616 ereport(elevel,
1617 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1618 errmsg("could not get language from ICU locale \"%s\": %s",
1619 loc_str, u_errorName(status)),
1620 errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1621 "icu_validation_level", "disabled")));
1622 return;
1623 }
1624
1625 /* check for special language name */
1626 if (strcmp(lang, "") == 0 ||
1627 strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
1628 found = true;
1629
1630 /* search for matching language within ICU */
1631 for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
1632 {
1633 const char *otherloc = uloc_getAvailable(i);
1634 char otherlang[ULOC_LANG_CAPACITY];
1635
1636 status = U_ZERO_ERROR;
1637 uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
1638 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1639 continue;
1640
1641 if (strcmp(lang, otherlang) == 0)
1642 found = true;
1643 }
1644
1645 if (!found)
1646 ereport(elevel,
1647 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1648 errmsg("ICU locale \"%s\" has unknown language \"%s\"",
1649 loc_str, lang),
1650 errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1651 "icu_validation_level", "disabled")));
1652
1653 /* check that it can be opened */
1654 collator = pg_ucol_open(loc_str);
1655 ucol_close(collator);
1656#else /* not USE_ICU */
1657 /* could get here if a collation was created by a build with ICU */
1658 ereport(ERROR,
1659 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1660 errmsg("ICU is not supported in this build")));
1661#endif /* not USE_ICU */
1662}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:717
#define Min(x, y)
Definition: c.h:975
uint32_t uint32
Definition: c.h:502
#define lengthof(array)
Definition: c.h:759
#define OidIsValid(objectId)
Definition: c.h:746
Oid collid
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define PG_RE_THROW()
Definition: elog.h:404
#define DEBUG3
Definition: elog.h:28
#define FATAL
Definition: elog.h:41
#define PG_TRY(...)
Definition: elog.h:371
#define WARNING
Definition: elog.h:36
#define PG_END_TRY(...)
Definition: elog.h:396
#define ERROR
Definition: elog.h:39
#define PG_CATCH(...)
Definition: elog.h:381
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define MaxAllocSize
Definition: fe_memutils.h:22
bool IsBinaryUpgrade
Definition: globals.c:121
Oid MyDatabaseId
Definition: globals.c:94
#define newval
GucSource
Definition: guc.h:112
@ PGC_S_DEFAULT
Definition: guc.h:113
Assert(PointerIsAligned(start, uint64))
const char * str
#define free(a)
Definition: header.h:65
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
#define period
Definition: indent_codes.h:66
static char * locale
Definition: initdb.c:140
int i
Definition: isn.c:77
static struct pg_tm tm
Definition: localtime.c:104
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3506
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1038
void SetMessageEncoding(int encoding)
Definition: mbutils.c:1172
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1690
char * pstrdup(const char *in)
Definition: mcxt.c:1703
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1548
void pfree(void *pointer)
Definition: mcxt.c:1528
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void * palloc(Size size)
Definition: mcxt.c:1321
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static char format
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:58
const void size_t len
FormData_pg_database * Form_pg_database
Definition: pg_database.h:96
int32 encoding
Definition: pg_database.h:41
size_t strfold_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
int icu_validation_level
Definition: pg_locale.c:114
static pg_locale_t last_collation_cache_locale
Definition: pg_locale.c:170
void cache_locale_time(void)
Definition: pg_locale.c:721
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1414
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:1373
char * localized_full_months[12+1]
Definition: pg_locale.c:126
void icu_validate_locale(const char *loc_str)
Definition: pg_locale.c:1594
pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context)
static bool CurrentLCTimeValid
Definition: pg_locale.c:135
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:389
char * get_collation_actual_version(char collprovider, const char *collcollate)
Definition: pg_locale.c:1240
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
size_t strupper_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:383
char * locale_messages
Definition: pg_locale.c:109
size_t strlower_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strtitle_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strupper_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * locale_numeric
Definition: pg_locale.c:111
size_t strtitle_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1189
size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1314
int builtin_locale_encoding(const char *locale)
Definition: pg_locale.c:1472
size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1461
size_t strupper_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
bool database_ctype_is_c
Definition: pg_locale.c:129
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:191
#define PGLOCALE_SUPPORT_ERROR(provider)
Definition: pg_locale.c:57
static pg_locale_t create_pg_locale(Oid collid, MemoryContext context)
Definition: pg_locale.c:1068
char * locale_time
Definition: pg_locale.c:112
static void cache_single_string(char **dst, const char *src, int encoding)
Definition: pg_locale.c:698
size_t strlower_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_libc(const char *collcollate)
size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1257
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:371
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:495
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:377
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:405
#define MAX_L10N_DATA
Definition: pg_locale.c:66
size_t strlower_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_builtin(const char *collcollate)
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:445
static MemoryContext CollationCacheContext
Definition: pg_locale.c:162
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:428
static bool CurrentLocaleConvValid
Definition: pg_locale.c:134
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:525
pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context)
size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1276
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1339
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:1425
char * icu_language_tag(const char *loc_str, int elevel)
Definition: pg_locale.c:1536
char * localized_abbrev_months[12+1]
Definition: pg_locale.c:125
static pg_locale_t default_locale
Definition: pg_locale.c:131
static collation_cache_hash * CollationCache
Definition: pg_locale.c:163
int pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.c:1359
size_t strfold_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:464
void init_database_collation(void)
Definition: pg_locale.c:1147
char * localized_full_days[7+1]
Definition: pg_locale.c:124
size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1389
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1496
size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1295
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:365
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:294
char * localized_abbrev_days[7+1]
Definition: pg_locale.c:123
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1436
char * locale_monetary
Definition: pg_locale.c:110
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:359
static Oid last_collation_cache_oid
Definition: pg_locale.c:169
#define LOCALE_NAME_BUFLEN
Definition: pg_locale.h:20
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
void report_newlocale_failure(const char *localename)
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
#define pg_encoding_to_char
Definition: pg_wchar.h:630
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
int pg_localeconv_r(const char *lc_monetary, const char *lc_numeric, struct lconv *output)
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:301
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void pg_localeconv_free(struct lconv *lconv)
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
static void AssertCouldGetRelation(void)
Definition: relcache.h:44
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:13113
bool pg_is_ascii(const char *str)
Definition: string.c:132
Definition: pg_locale.c:140
char status
Definition: pg_locale.c:146
Oid collid
Definition: pg_locale.c:141
pg_locale_t locale
Definition: pg_locale.c:142
uint32 hash
Definition: pg_locale.c:145
const struct collate_methods * collate
Definition: pg_locale.h:104
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:600
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:631
#define locale_t
Definition: win32_port.h:432
void _dosmaperr(unsigned long)
Definition: win32error.c:177
#define setenv(x, y, z)
Definition: win32_port.h:545
#define setlocale(a, b)
Definition: win32_port.h:475