@@ -1030,3 +1030,176 @@ pg_newlocale_from_collation(Oid collid)
1030
1030
1031
1031
return cache_entry -> locale ;
1032
1032
}
1033
+
1034
+
1035
+ /*
1036
+ * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1037
+ * Therefore we keep them here rather than with the mbutils code.
1038
+ */
1039
+
1040
+ #ifdef USE_WIDE_UPPER_LOWER
1041
+
1042
+ /*
1043
+ * wchar2char --- convert wide characters to multibyte format
1044
+ *
1045
+ * This has the same API as the standard wcstombs_l() function; in particular,
1046
+ * tolen is the maximum number of bytes to store at *to, and *from must be
1047
+ * zero-terminated. The output will be zero-terminated iff there is room.
1048
+ */
1049
+ size_t
1050
+ wchar2char (char * to , const wchar_t * from , size_t tolen , pg_locale_t locale )
1051
+ {
1052
+ size_t result ;
1053
+
1054
+ if (tolen == 0 )
1055
+ return 0 ;
1056
+
1057
+ #ifdef WIN32
1058
+
1059
+ /*
1060
+ * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1061
+ * for some reason mbstowcs and wcstombs won't do this for us, so we use
1062
+ * MultiByteToWideChar().
1063
+ */
1064
+ if (GetDatabaseEncoding () == PG_UTF8 )
1065
+ {
1066
+ result = WideCharToMultiByte (CP_UTF8 , 0 , from , -1 , to , tolen ,
1067
+ NULL , NULL );
1068
+ /* A zero return is failure */
1069
+ if (result <= 0 )
1070
+ result = -1 ;
1071
+ else
1072
+ {
1073
+ Assert (result <= tolen );
1074
+ /* Microsoft counts the zero terminator in the result */
1075
+ result -- ;
1076
+ }
1077
+ }
1078
+ else
1079
+ #endif /* WIN32 */
1080
+ if (locale == (pg_locale_t ) 0 )
1081
+ {
1082
+ /* Use wcstombs directly for the default locale */
1083
+ result = wcstombs (to , from , tolen );
1084
+ }
1085
+ else
1086
+ {
1087
+ #ifdef HAVE_LOCALE_T
1088
+ #ifdef HAVE_WCSTOMBS_L
1089
+ /* Use wcstombs_l for nondefault locales */
1090
+ result = wcstombs_l (to , from , tolen , locale );
1091
+ #else /* !HAVE_WCSTOMBS_L */
1092
+ /* We have to temporarily set the locale as current ... ugh */
1093
+ locale_t save_locale = uselocale (locale );
1094
+
1095
+ result = wcstombs (to , from , tolen );
1096
+
1097
+ uselocale (save_locale );
1098
+ #endif /* HAVE_WCSTOMBS_L */
1099
+ #else /* !HAVE_LOCALE_T */
1100
+ /* Can't have locale != 0 without HAVE_LOCALE_T */
1101
+ elog (ERROR , "wcstombs_l is not available" );
1102
+ result = 0 ; /* keep compiler quiet */
1103
+ #endif /* HAVE_LOCALE_T */
1104
+ }
1105
+
1106
+ return result ;
1107
+ }
1108
+
1109
+ /*
1110
+ * char2wchar --- convert multibyte characters to wide characters
1111
+ *
1112
+ * This has almost the API of mbstowcs_l(), except that *from need not be
1113
+ * null-terminated; instead, the number of input bytes is specified as
1114
+ * fromlen. Also, we ereport() rather than returning -1 for invalid
1115
+ * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1116
+ * The output will be zero-terminated iff there is room.
1117
+ */
1118
+ size_t
1119
+ char2wchar (wchar_t * to , size_t tolen , const char * from , size_t fromlen ,
1120
+ pg_locale_t locale )
1121
+ {
1122
+ size_t result ;
1123
+
1124
+ if (tolen == 0 )
1125
+ return 0 ;
1126
+
1127
+ #ifdef WIN32
1128
+ /* See WIN32 "Unicode" comment above */
1129
+ if (GetDatabaseEncoding () == PG_UTF8 )
1130
+ {
1131
+ /* Win32 API does not work for zero-length input */
1132
+ if (fromlen == 0 )
1133
+ result = 0 ;
1134
+ else
1135
+ {
1136
+ result = MultiByteToWideChar (CP_UTF8 , 0 , from , fromlen , to , tolen - 1 );
1137
+ /* A zero return is failure */
1138
+ if (result == 0 )
1139
+ result = -1 ;
1140
+ }
1141
+
1142
+ if (result != -1 )
1143
+ {
1144
+ Assert (result < tolen );
1145
+ /* Append trailing null wchar (MultiByteToWideChar() does not) */
1146
+ to [result ] = 0 ;
1147
+ }
1148
+ }
1149
+ else
1150
+ #endif /* WIN32 */
1151
+ {
1152
+ /* mbstowcs requires ending '\0' */
1153
+ char * str = pnstrdup (from , fromlen );
1154
+
1155
+ if (locale == (pg_locale_t ) 0 )
1156
+ {
1157
+ /* Use mbstowcs directly for the default locale */
1158
+ result = mbstowcs (to , str , tolen );
1159
+ }
1160
+ else
1161
+ {
1162
+ #ifdef HAVE_LOCALE_T
1163
+ #ifdef HAVE_WCSTOMBS_L
1164
+ /* Use mbstowcs_l for nondefault locales */
1165
+ result = mbstowcs_l (to , str , tolen , locale );
1166
+ #else /* !HAVE_WCSTOMBS_L */
1167
+ /* We have to temporarily set the locale as current ... ugh */
1168
+ locale_t save_locale = uselocale (locale );
1169
+
1170
+ result = mbstowcs (to , str , tolen );
1171
+
1172
+ uselocale (save_locale );
1173
+ #endif /* HAVE_WCSTOMBS_L */
1174
+ #else /* !HAVE_LOCALE_T */
1175
+ /* Can't have locale != 0 without HAVE_LOCALE_T */
1176
+ elog (ERROR , "mbstowcs_l is not available" );
1177
+ result = 0 ; /* keep compiler quiet */
1178
+ #endif /* HAVE_LOCALE_T */
1179
+ }
1180
+
1181
+ pfree (str );
1182
+ }
1183
+
1184
+ if (result == -1 )
1185
+ {
1186
+ /*
1187
+ * Invalid multibyte character encountered. We try to give a useful
1188
+ * error message by letting pg_verifymbstr check the string. But it's
1189
+ * possible that the string is OK to us, and not OK to mbstowcs ---
1190
+ * this suggests that the LC_CTYPE locale is different from the
1191
+ * database encoding. Give a generic error message if verifymbstr
1192
+ * can't find anything wrong.
1193
+ */
1194
+ pg_verifymbstr (from , fromlen , false); /* might not return */
1195
+ /* but if it does ... */
1196
+ ereport (ERROR ,
1197
+ (errcode (ERRCODE_CHARACTER_NOT_IN_REPERTOIRE ),
1198
+ errmsg ("invalid multibyte character for locale" ),
1199
+ errhint ("The server's LC_CTYPE locale is probably incompatible with the database encoding." )));
1200
+ }
1201
+
1202
+ return result ;
1203
+ }
1204
+
1205
+ #endif /* USE_WIDE_UPPER_LOWER */
0 commit comments