Skip to content

Commit c60c9ba

Browse files
committed
Convert json_in and jsonb_in to report errors softly.
This requires a bit of further infrastructure-extension to allow trapping errors reported by numeric_in and pg_unicode_to_server, but otherwise it's pretty straightforward. In the case of jsonb_in, we are only capturing errors reported during the initial "parse" phase. The value-construction phase (JsonbValueToJsonb) can also throw errors if assorted implementation limits are exceeded. We should improve that, but it seems like a separable project. Andrew Dunstan and Tom Lane Discussion: https://postgr.es/m/3bac9841-fe07-713d-fa42-606c225567d6@dunslane.net
1 parent 50428a3 commit c60c9ba

File tree

17 files changed

+282
-46
lines changed

17 files changed

+282
-46
lines changed

src/backend/utils/adt/json.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ json_in(PG_FUNCTION_ARGS)
8181

8282
/* validate it */
8383
lex = makeJsonLexContext(result, false);
84-
pg_parse_json_or_ereport(lex, &nullSemAction);
84+
if (!pg_parse_json_or_errsave(lex, &nullSemAction, fcinfo->context))
85+
PG_RETURN_NULL();
8586

86-
/* Internal representation is the same as text, for now */
87+
/* Internal representation is the same as text */
8788
PG_RETURN_TEXT_P(result);
8889
}
8990

@@ -1337,7 +1338,7 @@ json_typeof(PG_FUNCTION_ARGS)
13371338
/* Lex exactly one token from the input and check its type. */
13381339
result = json_lex(lex);
13391340
if (result != JSON_SUCCESS)
1340-
json_ereport_error(result, lex);
1341+
json_errsave_error(result, lex, NULL);
13411342
tok = lex->token_type;
13421343
switch (tok)
13431344
{

src/backend/utils/adt/jsonb.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ typedef struct JsonbInState
3333
{
3434
JsonbParseState *parseState;
3535
JsonbValue *res;
36+
Node *escontext;
3637
} JsonbInState;
3738

3839
/* unlike with json categories, we need to treat json and jsonb differently */
@@ -61,8 +62,8 @@ typedef struct JsonbAggState
6162
Oid val_output_func;
6263
} JsonbAggState;
6364

64-
static inline Datum jsonb_from_cstring(char *json, int len);
65-
static size_t checkStringLen(size_t len);
65+
static inline Datum jsonb_from_cstring(char *json, int len, Node *escontext);
66+
static bool checkStringLen(size_t len, Node *escontext);
6667
static JsonParseErrorType jsonb_in_object_start(void *pstate);
6768
static JsonParseErrorType jsonb_in_object_end(void *pstate);
6869
static JsonParseErrorType jsonb_in_array_start(void *pstate);
@@ -98,7 +99,7 @@ jsonb_in(PG_FUNCTION_ARGS)
9899
{
99100
char *json = PG_GETARG_CSTRING(0);
100101

101-
return jsonb_from_cstring(json, strlen(json));
102+
return jsonb_from_cstring(json, strlen(json), fcinfo->context);
102103
}
103104

104105
/*
@@ -122,7 +123,7 @@ jsonb_recv(PG_FUNCTION_ARGS)
122123
else
123124
elog(ERROR, "unsupported jsonb version number %d", version);
124125

125-
return jsonb_from_cstring(str, nbytes);
126+
return jsonb_from_cstring(str, nbytes, NULL);
126127
}
127128

128129
/*
@@ -251,9 +252,12 @@ jsonb_typeof(PG_FUNCTION_ARGS)
251252
* Turns json string into a jsonb Datum.
252253
*
253254
* Uses the json parser (with hooks) to construct a jsonb.
255+
*
256+
* If escontext points to an ErrorSaveContext, errors are reported there
257+
* instead of being thrown.
254258
*/
255259
static inline Datum
256-
jsonb_from_cstring(char *json, int len)
260+
jsonb_from_cstring(char *json, int len, Node *escontext)
257261
{
258262
JsonLexContext *lex;
259263
JsonbInState state;
@@ -263,6 +267,7 @@ jsonb_from_cstring(char *json, int len)
263267
memset(&sem, 0, sizeof(sem));
264268
lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
265269

270+
state.escontext = escontext;
266271
sem.semstate = (void *) &state;
267272

268273
sem.object_start = jsonb_in_object_start;
@@ -272,23 +277,24 @@ jsonb_from_cstring(char *json, int len)
272277
sem.scalar = jsonb_in_scalar;
273278
sem.object_field_start = jsonb_in_object_field_start;
274279

275-
pg_parse_json_or_ereport(lex, &sem);
280+
if (!pg_parse_json_or_errsave(lex, &sem, escontext))
281+
return (Datum) 0;
276282

277283
/* after parsing, the item member has the composed jsonb structure */
278284
PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
279285
}
280286

281-
static size_t
282-
checkStringLen(size_t len)
287+
static bool
288+
checkStringLen(size_t len, Node *escontext)
283289
{
284290
if (len > JENTRY_OFFLENMASK)
285-
ereport(ERROR,
291+
ereturn(escontext, false,
286292
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
287293
errmsg("string too long to represent as jsonb string"),
288294
errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
289295
JENTRY_OFFLENMASK)));
290296

291-
return len;
297+
return true;
292298
}
293299

294300
static JsonParseErrorType
@@ -339,7 +345,9 @@ jsonb_in_object_field_start(void *pstate, char *fname, bool isnull)
339345

340346
Assert(fname != NULL);
341347
v.type = jbvString;
342-
v.val.string.len = checkStringLen(strlen(fname));
348+
v.val.string.len = strlen(fname);
349+
if (!checkStringLen(v.val.string.len, _state->escontext))
350+
return JSON_SEM_ACTION_FAILED;
343351
v.val.string.val = fname;
344352

345353
_state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v);
@@ -390,7 +398,9 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
390398
case JSON_TOKEN_STRING:
391399
Assert(token != NULL);
392400
v.type = jbvString;
393-
v.val.string.len = checkStringLen(strlen(token));
401+
v.val.string.len = strlen(token);
402+
if (!checkStringLen(v.val.string.len, _state->escontext))
403+
return JSON_SEM_ACTION_FAILED;
394404
v.val.string.val = token;
395405
break;
396406
case JSON_TOKEN_NUMBER:
@@ -401,10 +411,11 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
401411
*/
402412
Assert(token != NULL);
403413
v.type = jbvNumeric;
404-
numd = DirectFunctionCall3(numeric_in,
405-
CStringGetDatum(token),
406-
ObjectIdGetDatum(InvalidOid),
407-
Int32GetDatum(-1));
414+
if (!DirectInputFunctionCallSafe(numeric_in, token,
415+
InvalidOid, -1,
416+
_state->escontext,
417+
&numd))
418+
return JSON_SEM_ACTION_FAILED;
408419
v.val.numeric = DatumGetNumeric(numd);
409420
break;
410421
case JSON_TOKEN_TRUE:
@@ -738,6 +749,9 @@ jsonb_categorize_type(Oid typoid,
738749
*
739750
* If key_scalar is true, the value is stored as a key, so insist
740751
* it's of an acceptable type, and force it to be a jbvString.
752+
*
753+
* Note: currently, we assume that result->escontext is NULL and errors
754+
* will be thrown.
741755
*/
742756
static void
743757
datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
@@ -910,7 +924,8 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
910924
default:
911925
outputstr = OidOutputFunctionCall(outfuncoid, val);
912926
jb.type = jbvString;
913-
jb.val.string.len = checkStringLen(strlen(outputstr));
927+
jb.val.string.len = strlen(outputstr);
928+
(void) checkStringLen(jb.val.string.len, NULL);
914929
jb.val.string.val = outputstr;
915930
break;
916931
}
@@ -1648,6 +1663,7 @@ jsonb_agg_finalfn(PG_FUNCTION_ARGS)
16481663
* shallow clone is sufficient as we aren't going to change any of the
16491664
* values, just add the final array end marker.
16501665
*/
1666+
memset(&result, 0, sizeof(JsonbInState));
16511667

16521668
result.parseState = clone_parse_state(arg->res->parseState);
16531669

@@ -1880,6 +1896,7 @@ jsonb_object_agg_finalfn(PG_FUNCTION_ARGS)
18801896
* going to change any of the values, just add the final object end
18811897
* marker.
18821898
*/
1899+
memset(&result, 0, sizeof(JsonbInState));
18831900

18841901
result.parseState = clone_parse_state(arg->res->parseState);
18851902

src/backend/utils/adt/jsonfuncs.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "lib/stringinfo.h"
2626
#include "mb/pg_wchar.h"
2727
#include "miscadmin.h"
28+
#include "nodes/miscnodes.h"
2829
#include "utils/array.h"
2930
#include "utils/builtins.h"
3031
#include "utils/fmgroids.h"
@@ -490,21 +491,31 @@ static JsonParseErrorType transform_string_values_object_field_start(void *state
490491
static JsonParseErrorType transform_string_values_array_element_start(void *state, bool isnull);
491492
static JsonParseErrorType transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
492493

494+
493495
/*
494-
* pg_parse_json_or_ereport
496+
* pg_parse_json_or_errsave
495497
*
496498
* This function is like pg_parse_json, except that it does not return a
497499
* JsonParseErrorType. Instead, in case of any failure, this function will
500+
* save error data into *escontext if that's an ErrorSaveContext, otherwise
498501
* ereport(ERROR).
502+
*
503+
* Returns a boolean indicating success or failure (failure will only be
504+
* returned when escontext is an ErrorSaveContext).
499505
*/
500-
void
501-
pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
506+
bool
507+
pg_parse_json_or_errsave(JsonLexContext *lex, JsonSemAction *sem,
508+
Node *escontext)
502509
{
503510
JsonParseErrorType result;
504511

505512
result = pg_parse_json(lex, sem);
506513
if (result != JSON_SUCCESS)
507-
json_ereport_error(result, lex);
514+
{
515+
json_errsave_error(result, lex, escontext);
516+
return false;
517+
}
518+
return true;
508519
}
509520

510521
/*
@@ -608,17 +619,25 @@ jsonb_object_keys(PG_FUNCTION_ARGS)
608619
* Report a JSON error.
609620
*/
610621
void
611-
json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
622+
json_errsave_error(JsonParseErrorType error, JsonLexContext *lex,
623+
Node *escontext)
612624
{
613625
if (error == JSON_UNICODE_HIGH_ESCAPE ||
626+
error == JSON_UNICODE_UNTRANSLATABLE ||
614627
error == JSON_UNICODE_CODE_POINT_ZERO)
615-
ereport(ERROR,
628+
errsave(escontext,
616629
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
617630
errmsg("unsupported Unicode escape sequence"),
618631
errdetail_internal("%s", json_errdetail(error, lex)),
619632
report_json_context(lex)));
633+
else if (error == JSON_SEM_ACTION_FAILED)
634+
{
635+
/* semantic action function had better have reported something */
636+
if (!SOFT_ERROR_OCCURRED(escontext))
637+
elog(ERROR, "JSON semantic action function did not provide error information");
638+
}
620639
else
621-
ereport(ERROR,
640+
errsave(escontext,
622641
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
623642
errmsg("invalid input syntax for type %s", "json"),
624643
errdetail_internal("%s", json_errdetail(error, lex)),
@@ -1274,7 +1293,7 @@ get_array_start(void *state)
12741293

12751294
error = json_count_array_elements(_state->lex, &nelements);
12761295
if (error != JSON_SUCCESS)
1277-
json_ereport_error(error, _state->lex);
1296+
json_errsave_error(error, _state->lex, NULL);
12781297

12791298
if (-_state->path_indexes[lex_level] <= nelements)
12801299
_state->path_indexes[lex_level] += nelements;

src/backend/utils/fmgr/fmgr.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,51 @@ InputFunctionCallSafe(FmgrInfo *flinfo, char *str,
16141614
return true;
16151615
}
16161616

1617+
/*
1618+
* Call a directly-named datatype input function, with non-exception
1619+
* handling of "soft" errors.
1620+
*
1621+
* This is like InputFunctionCallSafe, except that it is given a direct
1622+
* pointer to the C function to call. We assume that that function is
1623+
* strict. Also, the function cannot be one that needs to
1624+
* look at FmgrInfo, since there won't be any.
1625+
*/
1626+
bool
1627+
DirectInputFunctionCallSafe(PGFunction func, char *str,
1628+
Oid typioparam, int32 typmod,
1629+
fmNodePtr escontext,
1630+
Datum *result)
1631+
{
1632+
LOCAL_FCINFO(fcinfo, 3);
1633+
1634+
if (str == NULL)
1635+
{
1636+
*result = (Datum) 0; /* just return null result */
1637+
return true;
1638+
}
1639+
1640+
InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, escontext, NULL);
1641+
1642+
fcinfo->args[0].value = CStringGetDatum(str);
1643+
fcinfo->args[0].isnull = false;
1644+
fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
1645+
fcinfo->args[1].isnull = false;
1646+
fcinfo->args[2].value = Int32GetDatum(typmod);
1647+
fcinfo->args[2].isnull = false;
1648+
1649+
*result = (*func) (fcinfo);
1650+
1651+
/* Result value is garbage, and could be null, if an error was reported */
1652+
if (SOFT_ERROR_OCCURRED(escontext))
1653+
return false;
1654+
1655+
/* Otherwise, shouldn't get null result */
1656+
if (fcinfo->isnull)
1657+
elog(ERROR, "input function %p returned NULL", (void *) func);
1658+
1659+
return true;
1660+
}
1661+
16171662
/*
16181663
* Call a previously-looked-up datatype output function.
16191664
*

src/backend/utils/mb/mbutils.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,63 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
916916
BoolGetDatum(false));
917917
}
918918

919+
/*
920+
* Convert a single Unicode code point into a string in the server encoding.
921+
*
922+
* Same as pg_unicode_to_server(), except that we don't throw errors,
923+
* but simply return false on conversion failure.
924+
*/
925+
bool
926+
pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
927+
{
928+
unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
929+
int c_as_utf8_len;
930+
int converted_len;
931+
int server_encoding;
932+
933+
/* Fail if invalid Unicode code point */
934+
if (!is_valid_unicode_codepoint(c))
935+
return false;
936+
937+
/* Otherwise, if it's in ASCII range, conversion is trivial */
938+
if (c <= 0x7F)
939+
{
940+
s[0] = (unsigned char) c;
941+
s[1] = '\0';
942+
return true;
943+
}
944+
945+
/* If the server encoding is UTF-8, we just need to reformat the code */
946+
server_encoding = GetDatabaseEncoding();
947+
if (server_encoding == PG_UTF8)
948+
{
949+
unicode_to_utf8(c, s);
950+
s[pg_utf_mblen(s)] = '\0';
951+
return true;
952+
}
953+
954+
/* For all other cases, we must have a conversion function available */
955+
if (Utf8ToServerConvProc == NULL)
956+
return false;
957+
958+
/* Construct UTF-8 source string */
959+
unicode_to_utf8(c, c_as_utf8);
960+
c_as_utf8_len = pg_utf_mblen(c_as_utf8);
961+
c_as_utf8[c_as_utf8_len] = '\0';
962+
963+
/* Convert, but without throwing error if we can't */
964+
converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
965+
Int32GetDatum(PG_UTF8),
966+
Int32GetDatum(server_encoding),
967+
CStringGetDatum((char *) c_as_utf8),
968+
CStringGetDatum((char *) s),
969+
Int32GetDatum(c_as_utf8_len),
970+
BoolGetDatum(true)));
971+
972+
/* Conversion was successful iff it consumed the whole input */
973+
return (converted_len == c_as_utf8_len);
974+
}
975+
919976

920977
/* convert a multibyte string to a wchar */
921978
int

0 commit comments

Comments
 (0)