Skip to content

Commit ea1b99a

Browse files
committed
Add 'noError' argument to encoding conversion functions.
With the 'noError' argument, you can try to convert a buffer without knowing the character boundaries beforehand. The functions now need to return the number of input bytes successfully converted. This is is a backwards-incompatible change, if you have created a custom encoding conversion with CREATE CONVERSION. This adds a check to pg_upgrade for that, refusing the upgrade if there are any user-defined encoding conversions. Custom conversions are very rare, there are no commonly used extensions that I know of that uses that feature. No other objects can depend on conversions, so if you do have one, you can fairly easily drop it before upgrading, and recreate it after the upgrade with an updated version. Add regression tests for built-in encoding conversions. This doesn't cover every conversion, but it covers all the internal functions in conv.c that are used to implement the conversions. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01%40iki.fi
1 parent e2639a7 commit ea1b99a

File tree

40 files changed

+2333
-631
lines changed

40 files changed

+2333
-631
lines changed

doc/src/sgml/ref/create_conversion.sgml

+9-3
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,15 @@ conv_proc(
117117
integer, -- destination encoding ID
118118
cstring, -- source string (null terminated C string)
119119
internal, -- destination (fill with a null terminated C string)
120-
integer -- source string length
121-
) RETURNS void;
122-
</programlisting></para>
120+
integer, -- source string length
121+
boolean -- if true, don't throw an error if conversion fails
122+
) RETURNS integer;
123+
</programlisting>
124+
The return value is the number of source bytes that were successfully
125+
converted. If the last argument is false, the function must throw an
126+
error on invalid input, and the return value is always equal to the
127+
source string length.
128+
</para>
123129
</listitem>
124130
</varlistentry>
125131
</variablelist>

src/backend/commands/conversioncmds.c

+22-10
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ CreateConversionCommand(CreateConversionStmt *stmt)
4545
const char *from_encoding_name = stmt->for_encoding_name;
4646
const char *to_encoding_name = stmt->to_encoding_name;
4747
List *func_name = stmt->func_name;
48-
static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID};
48+
static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
4949
char result[1];
50+
Datum funcresult;
5051

5152
/* Convert list of names to a name and namespace */
5253
namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
@@ -92,12 +93,12 @@ CreateConversionCommand(CreateConversionStmt *stmt)
9293
funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
9394
funcargs, false);
9495

95-
/* Check it returns VOID, else it's probably the wrong function */
96-
if (get_func_rettype(funcoid) != VOIDOID)
96+
/* Check it returns int4, else it's probably the wrong function */
97+
if (get_func_rettype(funcoid) != INT4OID)
9798
ereport(ERROR,
9899
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
99100
errmsg("encoding conversion function %s must return type %s",
100-
NameListToString(func_name), "void")));
101+
NameListToString(func_name), "integer")));
101102

102103
/* Check we have EXECUTE rights for the function */
103104
aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
@@ -111,12 +112,23 @@ CreateConversionCommand(CreateConversionStmt *stmt)
111112
* string; the conversion function should throw an error if it can't
112113
* perform the requested conversion.
113114
*/
114-
OidFunctionCall5(funcoid,
115-
Int32GetDatum(from_encoding),
116-
Int32GetDatum(to_encoding),
117-
CStringGetDatum(""),
118-
CStringGetDatum(result),
119-
Int32GetDatum(0));
115+
funcresult = OidFunctionCall6(funcoid,
116+
Int32GetDatum(from_encoding),
117+
Int32GetDatum(to_encoding),
118+
CStringGetDatum(""),
119+
CStringGetDatum(result),
120+
Int32GetDatum(0),
121+
BoolGetDatum(false));
122+
123+
/*
124+
* The function should return 0 for empty input. Might as well check that,
125+
* too.
126+
*/
127+
if (DatumGetInt32(funcresult) != 0)
128+
ereport(ERROR,
129+
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
130+
errmsg("encoding conversion function %s returned incorrect result for empty input",
131+
NameListToString(func_name))));
120132

121133
/*
122134
* All seem ok, go ahead (possible failure would be a duplicate conversion

src/backend/utils/error/elog.c

+2
Original file line numberDiff line numberDiff line change
@@ -2271,6 +2271,8 @@ write_console(const char *line, int len)
22712271
* Conversion on non-win32 platforms is not implemented yet. It requires
22722272
* non-throw version of pg_do_encoding_conversion(), that converts
22732273
* unconvertable characters to '?' without errors.
2274+
*
2275+
* XXX: We have a no-throw version now. It doesn't convert to '?' though.
22742276
*/
22752277
#endif
22762278

0 commit comments

Comments
 (0)