Skip to content

Commit 5c32c21

Browse files
committed
jsonapi: add lexer option to keep token ownership
Commit 0785d1b adds support for libpq as a JSON client, but allocations for string tokens can still be leaked during parsing failures. This is tricky to fix for the object_field semantic callbacks: the field name must remain valid until the end of the object, but if a parsing error is encountered partway through, object_field_end() won't be invoked and the client won't get a chance to free the field name. This patch adds a flag to switch the ownership of parsed tokens to the lexer. When this is enabled, the client must make a copy of any tokens it wants to persist past the callback lifetime, but the lexer will handle necessary cleanup on failure. Backend uses of the JSON parser don't need to use this flag, since the parser's allocations will occur in a short lived memory context. A -o option has been added to test_json_parser_incremental to exercise the new setJsonLexContextOwnsTokens() API, and the test_json_parser TAP tests make use of it. (The test program now cleans up allocated memory, so that tests can be usefully run under leak sanitizers.) Author: Jacob Champion Discussion: https://postgr.es/m/CAOYmi+kb38EciwyBQOf9peApKGwraHqA7pgzBkvoUnw5BRfS1g@mail.gmail.com
1 parent 262283d commit 5c32c21

File tree

6 files changed

+173
-33
lines changed

6 files changed

+173
-33
lines changed

src/common/jsonapi.c

+94-8
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct JsonParserStack
161161
*/
162162
struct JsonIncrementalState
163163
{
164+
bool started;
164165
bool is_last_chunk;
165166
bool partial_completed;
166167
jsonapi_StrValType partial_token;
@@ -280,6 +281,7 @@ static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSem
280281
static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
281282
static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
282283
static bool allocate_incremental_state(JsonLexContext *lex);
284+
static inline void set_fname(JsonLexContext *lex, char *fname);
283285

284286
/* the null action object used for pure validation */
285287
const JsonSemAction nullSemAction =
@@ -437,7 +439,7 @@ allocate_incremental_state(JsonLexContext *lex)
437439
*fnull;
438440

439441
lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
440-
pstack = ALLOC(sizeof(JsonParserStack));
442+
pstack = ALLOC0(sizeof(JsonParserStack));
441443
prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
442444
fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
443445
fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
@@ -464,10 +466,17 @@ allocate_incremental_state(JsonLexContext *lex)
464466
lex->pstack = pstack;
465467
lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
466468
lex->pstack->prediction = prediction;
467-
lex->pstack->pred_index = 0;
468469
lex->pstack->fnames = fnames;
469470
lex->pstack->fnull = fnull;
470471

472+
/*
473+
* fnames between 0 and lex_level must always be defined so that
474+
* freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
475+
* the rest.
476+
*/
477+
Assert(lex->lex_level == 0);
478+
lex->pstack->fnames[0] = NULL;
479+
471480
lex->incremental = true;
472481
return true;
473482
}
@@ -530,6 +539,25 @@ makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
530539
return lex;
531540
}
532541

542+
void
543+
setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
544+
{
545+
if (lex->incremental && lex->inc_state->started)
546+
{
547+
/*
548+
* Switching this flag after parsing has already started is a
549+
* programming error.
550+
*/
551+
Assert(false);
552+
return;
553+
}
554+
555+
if (owned_by_context)
556+
lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
557+
else
558+
lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
559+
}
560+
533561
static inline bool
534562
inc_lex_level(JsonLexContext *lex)
535563
{
@@ -569,12 +597,23 @@ inc_lex_level(JsonLexContext *lex)
569597
}
570598

571599
lex->lex_level += 1;
600+
601+
if (lex->incremental)
602+
{
603+
/*
604+
* Ensure freeJsonLexContext() remains safe even if no fname is
605+
* assigned at this level.
606+
*/
607+
lex->pstack->fnames[lex->lex_level] = NULL;
608+
}
609+
572610
return true;
573611
}
574612

575613
static inline void
576614
dec_lex_level(JsonLexContext *lex)
577615
{
616+
set_fname(lex, NULL); /* free the current level's fname, if needed */
578617
lex->lex_level -= 1;
579618
}
580619

@@ -608,6 +647,15 @@ have_prediction(JsonParserStack *pstack)
608647
static inline void
609648
set_fname(JsonLexContext *lex, char *fname)
610649
{
650+
if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
651+
{
652+
/*
653+
* Don't leak prior fnames. If one hasn't been assigned yet,
654+
* inc_lex_level ensured that it's NULL (and therefore safe to free).
655+
*/
656+
FREE(lex->pstack->fnames[lex->lex_level]);
657+
}
658+
611659
lex->pstack->fnames[lex->lex_level] = fname;
612660
}
613661

@@ -655,8 +703,19 @@ freeJsonLexContext(JsonLexContext *lex)
655703
jsonapi_termStringInfo(&lex->inc_state->partial_token);
656704
FREE(lex->inc_state);
657705
FREE(lex->pstack->prediction);
706+
707+
if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
708+
{
709+
int i;
710+
711+
/* Clean up any tokens that were left behind. */
712+
for (i = 0; i <= lex->lex_level; i++)
713+
FREE(lex->pstack->fnames[i]);
714+
}
715+
658716
FREE(lex->pstack->fnames);
659717
FREE(lex->pstack->fnull);
718+
FREE(lex->pstack->scalar_val);
660719
FREE(lex->pstack);
661720
}
662721

@@ -826,6 +885,7 @@ pg_parse_json_incremental(JsonLexContext *lex,
826885
lex->input = lex->token_terminator = lex->line_start = json;
827886
lex->input_length = len;
828887
lex->inc_state->is_last_chunk = is_last;
888+
lex->inc_state->started = true;
829889

830890
/* get the initial token */
831891
result = json_lex(lex);
@@ -1086,6 +1146,17 @@ pg_parse_json_incremental(JsonLexContext *lex,
10861146
if (sfunc != NULL)
10871147
{
10881148
result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
1149+
1150+
/*
1151+
* Either ownership of the token passed to the
1152+
* callback, or we need to free it now. Either
1153+
* way, clear our pointer to it so it doesn't get
1154+
* freed in the future.
1155+
*/
1156+
if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1157+
FREE(pstack->scalar_val);
1158+
pstack->scalar_val = NULL;
1159+
10891160
if (result != JSON_SUCCESS)
10901161
return result;
10911162
}
@@ -1221,11 +1292,17 @@ parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
12211292
/* consume the token */
12221293
result = json_lex(lex);
12231294
if (result != JSON_SUCCESS)
1295+
{
1296+
FREE(val);
12241297
return result;
1298+
}
12251299

1226-
/* invoke the callback */
1300+
/* invoke the callback, which may take ownership of val */
12271301
result = (*sfunc) (sem->semstate, val, tok);
12281302

1303+
if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1304+
FREE(val);
1305+
12291306
return result;
12301307
}
12311308

@@ -1238,7 +1315,7 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
12381315
* generally call a field name a "key".
12391316
*/
12401317

1241-
char *fname = NULL; /* keep compiler quiet */
1318+
char *fname = NULL;
12421319
json_ofield_action ostart = sem->object_field_start;
12431320
json_ofield_action oend = sem->object_field_end;
12441321
bool isnull;
@@ -1255,11 +1332,17 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
12551332
}
12561333
result = json_lex(lex);
12571334
if (result != JSON_SUCCESS)
1335+
{
1336+
FREE(fname);
12581337
return result;
1338+
}
12591339

12601340
result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
12611341
if (result != JSON_SUCCESS)
1342+
{
1343+
FREE(fname);
12621344
return result;
1345+
}
12631346

12641347
tok = lex_peek(lex);
12651348
isnull = tok == JSON_TOKEN_NULL;
@@ -1268,7 +1351,7 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
12681351
{
12691352
result = (*ostart) (sem->semstate, fname, isnull);
12701353
if (result != JSON_SUCCESS)
1271-
return result;
1354+
goto ofield_cleanup;
12721355
}
12731356

12741357
switch (tok)
@@ -1283,16 +1366,19 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
12831366
result = parse_scalar(lex, sem);
12841367
}
12851368
if (result != JSON_SUCCESS)
1286-
return result;
1369+
goto ofield_cleanup;
12871370

12881371
if (oend != NULL)
12891372
{
12901373
result = (*oend) (sem->semstate, fname, isnull);
12911374
if (result != JSON_SUCCESS)
1292-
return result;
1375+
goto ofield_cleanup;
12931376
}
12941377

1295-
return JSON_SUCCESS;
1378+
ofield_cleanup:
1379+
if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1380+
FREE(fname);
1381+
return result;
12961382
}
12971383

12981384
static JsonParseErrorType

src/include/common/jsonapi.h

+25-3
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,11 @@ typedef struct JsonIncrementalState JsonIncrementalState;
9292
* conjunction with token_start.
9393
*
9494
* JSONLEX_FREE_STRUCT/STRVAL are used to drive freeJsonLexContext.
95+
* JSONLEX_CTX_OWNS_TOKENS is used by setJsonLexContextOwnsTokens.
9596
*/
9697
#define JSONLEX_FREE_STRUCT (1 << 0)
9798
#define JSONLEX_FREE_STRVAL (1 << 1)
99+
#define JSONLEX_CTX_OWNS_TOKENS (1 << 2)
98100
typedef struct JsonLexContext
99101
{
100102
const char *input;
@@ -130,9 +132,10 @@ typedef JsonParseErrorType (*json_scalar_action) (void *state, char *token, Json
130132
* to doing a pure parse with no side-effects, and is therefore exactly
131133
* what the json input routines do.
132134
*
133-
* The 'fname' and 'token' strings passed to these actions are palloc'd.
134-
* They are not free'd or used further by the parser, so the action function
135-
* is free to do what it wishes with them.
135+
* By default, the 'fname' and 'token' strings passed to these actions are
136+
* palloc'd. They are not free'd or used further by the parser, so the action
137+
* function is free to do what it wishes with them. This behavior may be
138+
* modified by setJsonLexContextOwnsTokens().
136139
*
137140
* All action functions return JsonParseErrorType. If the result isn't
138141
* JSON_SUCCESS, the parse is abandoned and that error code is returned.
@@ -216,6 +219,25 @@ extern JsonLexContext *makeJsonLexContextIncremental(JsonLexContext *lex,
216219
int encoding,
217220
bool need_escapes);
218221

222+
/*
223+
* Sets whether tokens passed to semantic action callbacks are owned by the
224+
* context (in which case, the callback must duplicate the tokens for long-term
225+
* storage) or by the callback (in which case, the callback must explicitly
226+
* free tokens to avoid leaks).
227+
*
228+
* By default, this setting is false: the callback owns the tokens that are
229+
* passed to it (and if parsing fails between the two object-field callbacks,
230+
* the field name token will likely leak). If set to true, tokens will be freed
231+
* by the lexer after the callback completes.
232+
*
233+
* Setting this to true is important for long-lived clients (such as libpq)
234+
* that must not leak memory during a parse failure. For a server backend using
235+
* memory contexts, or a client application which will exit on parse failure,
236+
* this setting is less critical.
237+
*/
238+
extern void setJsonLexContextOwnsTokens(JsonLexContext *lex,
239+
bool owned_by_context);
240+
219241
extern void freeJsonLexContext(JsonLexContext *lex);
220242

221243
/* lex one token */

src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl

+8-5
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,24 @@
1313

1414
my $test_file = "$FindBin::RealBin/../tiny.json";
1515

16-
my @exes =
17-
("test_json_parser_incremental", "test_json_parser_incremental_shlib");
16+
my @exes = (
17+
[ "test_json_parser_incremental", ],
18+
[ "test_json_parser_incremental", "-o", ],
19+
[ "test_json_parser_incremental_shlib", ],
20+
[ "test_json_parser_incremental_shlib", "-o", ]);
1821

1922
foreach my $exe (@exes)
2023
{
21-
note "testing executable $exe";
24+
note "testing executable @$exe";
2225

2326
# Test the usage error
24-
my ($stdout, $stderr) = run_command([ $exe, "-c", 10 ]);
27+
my ($stdout, $stderr) = run_command([ @$exe, "-c", 10 ]);
2528
like($stderr, qr/Usage:/, 'error message if not enough arguments');
2629

2730
# Test that we get success for small chunk sizes from 64 down to 1.
2831
for (my $size = 64; $size > 0; $size--)
2932
{
30-
($stdout, $stderr) = run_command([ $exe, "-c", $size, $test_file ]);
33+
($stdout, $stderr) = run_command([ @$exe, "-c", $size, $test_file ]);
3134

3235
like($stdout, qr/SUCCESS/, "chunk size $size: test succeeds");
3336
is($stderr, "", "chunk size $size: no error output");

src/test/modules/test_json_parser/t/002_inline.pl

+9-6
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
use File::Temp qw(tempfile);
1414

1515
my $dir = PostgreSQL::Test::Utils::tempdir;
16-
my $exe;
16+
my @exe;
1717

1818
sub test
1919
{
@@ -35,7 +35,7 @@ sub test
3535

3636
foreach my $size (reverse(1 .. $chunk))
3737
{
38-
my ($stdout, $stderr) = run_command([ $exe, "-c", $size, $fname ]);
38+
my ($stdout, $stderr) = run_command([ @exe, "-c", $size, $fname ]);
3939

4040
if (defined($params{error}))
4141
{
@@ -53,13 +53,16 @@ sub test
5353
}
5454
}
5555

56-
my @exes =
57-
("test_json_parser_incremental", "test_json_parser_incremental_shlib");
56+
my @exes = (
57+
[ "test_json_parser_incremental", ],
58+
[ "test_json_parser_incremental", "-o", ],
59+
[ "test_json_parser_incremental_shlib", ],
60+
[ "test_json_parser_incremental_shlib", "-o", ]);
5861

5962
foreach (@exes)
6063
{
61-
$exe = $_;
62-
note "testing executable $exe";
64+
@exe = @$_;
65+
note "testing executable @exe";
6366

6467
test("number", "12345");
6568
test("string", '"hello"');

src/test/modules/test_json_parser/t/003_test_semantic.pl

+7-4
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@
1616
my $test_file = "$FindBin::RealBin/../tiny.json";
1717
my $test_out = "$FindBin::RealBin/../tiny.out";
1818

19-
my @exes =
20-
("test_json_parser_incremental", "test_json_parser_incremental_shlib");
19+
my @exes = (
20+
[ "test_json_parser_incremental", ],
21+
[ "test_json_parser_incremental", "-o", ],
22+
[ "test_json_parser_incremental_shlib", ],
23+
[ "test_json_parser_incremental_shlib", "-o", ]);
2124

2225
foreach my $exe (@exes)
2326
{
24-
note "testing executable $exe";
27+
note "testing executable @$exe";
2528

26-
my ($stdout, $stderr) = run_command([ $exe, "-s", $test_file ]);
29+
my ($stdout, $stderr) = run_command([ @$exe, "-s", $test_file ]);
2730

2831
is($stderr, "", "no error output");
2932

0 commit comments

Comments
 (0)