Skip to content

Commit ea7b4e9

Browse files
committed
Add support for incrementally parsing backup manifests
This adds the infrastructure for using the new non-recursive JSON parser in processing manifests. It's important that callers make sure that the last piece of json handed to the incremental manifest parser contains the entire last few lines of the manifest, including the checksum. Author: Andrew Dunstan Reviewed-By: Jacob Champion Discussion: https://postgr.es/m/7b0a51d6-0d9d-7366-3a1a-f74397a02f55@dunslane.net
1 parent 3311ea8 commit ea7b4e9

File tree

2 files changed

+118
-8
lines changed

2 files changed

+118
-8
lines changed

src/common/parse_manifest.c

Lines changed: 113 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ typedef struct
9191
char *manifest_checksum;
9292
} JsonManifestParseState;
9393

94+
typedef struct JsonManifestParseIncrementalState
95+
{
96+
JsonLexContext lex;
97+
JsonSemAction sem;
98+
pg_cryptohash_ctx *manifest_ctx;
99+
} JsonManifestParseIncrementalState;
100+
94101
static JsonParseErrorType json_manifest_object_start(void *state);
95102
static JsonParseErrorType json_manifest_object_end(void *state);
96103
static JsonParseErrorType json_manifest_array_start(void *state);
@@ -104,14 +111,99 @@ static void json_manifest_finalize_system_identifier(JsonManifestParseState *par
104111
static void json_manifest_finalize_file(JsonManifestParseState *parse);
105112
static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
106113
static void verify_manifest_checksum(JsonManifestParseState *parse,
107-
char *buffer, size_t size);
114+
char *buffer, size_t size,
115+
pg_cryptohash_ctx *incr_ctx);
108116
static void json_manifest_parse_failure(JsonManifestParseContext *context,
109117
char *msg);
110118

111119
static int hexdecode_char(char c);
112120
static bool hexdecode_string(uint8 *result, char *input, int nbytes);
113121
static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
114122

123+
/*
124+
* Set up for incremental parsing of the manifest.
125+
*
126+
*/
127+
128+
JsonManifestParseIncrementalState *
129+
json_parse_manifest_incremental_init(JsonManifestParseContext *context)
130+
{
131+
JsonManifestParseIncrementalState *incstate;
132+
JsonManifestParseState *parse;
133+
pg_cryptohash_ctx *manifest_ctx;
134+
135+
incstate = palloc(sizeof(JsonManifestParseIncrementalState));
136+
parse = palloc(sizeof(JsonManifestParseState));
137+
138+
parse->context = context;
139+
parse->state = JM_EXPECT_TOPLEVEL_START;
140+
parse->saw_version_field = false;
141+
142+
makeJsonLexContextIncremental(&(incstate->lex), PG_UTF8, true);
143+
144+
incstate->sem.semstate = parse;
145+
incstate->sem.object_start = json_manifest_object_start;
146+
incstate->sem.object_end = json_manifest_object_end;
147+
incstate->sem.array_start = json_manifest_array_start;
148+
incstate->sem.array_end = json_manifest_array_end;
149+
incstate->sem.object_field_start = json_manifest_object_field_start;
150+
incstate->sem.object_field_end = NULL;
151+
incstate->sem.array_element_start = NULL;
152+
incstate->sem.array_element_end = NULL;
153+
incstate->sem.scalar = json_manifest_scalar;
154+
155+
manifest_ctx = pg_cryptohash_create(PG_SHA256);
156+
if (manifest_ctx == NULL)
157+
context->error_cb(context, "out of memory");
158+
if (pg_cryptohash_init(manifest_ctx) < 0)
159+
context->error_cb(context, "could not initialize checksum of manifest");
160+
incstate->manifest_ctx = manifest_ctx;
161+
162+
return incstate;
163+
}
164+
165+
/*
166+
* parse the manifest in pieces.
167+
*
168+
* The caller must ensure that the final piece contains the final lines
169+
* with the complete checksum.
170+
*/
171+
172+
void
173+
json_parse_manifest_incremental_chunk(
174+
JsonManifestParseIncrementalState *incstate, char *chunk, int size,
175+
bool is_last)
176+
{
177+
JsonParseErrorType res,
178+
expected;
179+
JsonManifestParseState *parse = incstate->sem.semstate;
180+
JsonManifestParseContext *context = parse->context;
181+
182+
res = pg_parse_json_incremental(&(incstate->lex), &(incstate->sem),
183+
chunk, size, is_last);
184+
185+
expected = is_last ? JSON_SUCCESS : JSON_INCOMPLETE;
186+
187+
if (res != expected)
188+
json_manifest_parse_failure(context,
189+
json_errdetail(res, &(incstate->lex)));
190+
191+
if (is_last && parse->state != JM_EXPECT_EOF)
192+
json_manifest_parse_failure(context, "manifest ended unexpectedly");
193+
194+
if (!is_last)
195+
{
196+
if (pg_cryptohash_update(incstate->manifest_ctx,
197+
(uint8 *) chunk, size) < 0)
198+
context->error_cb(context, "could not update checksum of manifest");
199+
}
200+
else
201+
{
202+
verify_manifest_checksum(parse, chunk, size, incstate->manifest_ctx);
203+
}
204+
}
205+
206+
115207
/*
116208
* Main entrypoint to parse a JSON-format backup manifest.
117209
*
@@ -157,7 +249,7 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer,
157249
json_manifest_parse_failure(context, "manifest ended unexpectedly");
158250

159251
/* Verify the manifest checksum. */
160-
verify_manifest_checksum(&parse, buffer, size);
252+
verify_manifest_checksum(&parse, buffer, size, NULL);
161253

162254
freeJsonLexContext(lex);
163255
}
@@ -390,6 +482,8 @@ json_manifest_object_field_start(void *state, char *fname, bool isnull)
390482
break;
391483
}
392484

485+
pfree(fname);
486+
393487
return JSON_SUCCESS;
394488
}
395489

@@ -698,10 +792,14 @@ json_manifest_finalize_wal_range(JsonManifestParseState *parse)
698792
* The last line of the manifest file is excluded from the manifest checksum,
699793
* because the last line is expected to contain the checksum that covers
700794
* the rest of the file.
795+
*
796+
* For an incremental parse, this will just be called on the last chunk of the
797+
* manifest, and the cryptohash context paswed in. For a non-incremental
798+
* parse incr_ctx will be NULL.
701799
*/
702800
static void
703801
verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
704-
size_t size)
802+
size_t size, pg_cryptohash_ctx *incr_ctx)
705803
{
706804
JsonManifestParseContext *context = parse->context;
707805
size_t i;
@@ -736,11 +834,18 @@ verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
736834
"last line not newline-terminated");
737835

738836
/* Checksum the rest. */
739-
manifest_ctx = pg_cryptohash_create(PG_SHA256);
740-
if (manifest_ctx == NULL)
741-
context->error_cb(context, "out of memory");
742-
if (pg_cryptohash_init(manifest_ctx) < 0)
743-
context->error_cb(context, "could not initialize checksum of manifest");
837+
if (incr_ctx == NULL)
838+
{
839+
manifest_ctx = pg_cryptohash_create(PG_SHA256);
840+
if (manifest_ctx == NULL)
841+
context->error_cb(context, "out of memory");
842+
if (pg_cryptohash_init(manifest_ctx) < 0)
843+
context->error_cb(context, "could not initialize checksum of manifest");
844+
}
845+
else
846+
{
847+
manifest_ctx = incr_ctx;
848+
}
744849
if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0)
745850
context->error_cb(context, "could not update checksum of manifest");
746851
if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,

src/include/common/parse_manifest.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
struct JsonManifestParseContext;
2222
typedef struct JsonManifestParseContext JsonManifestParseContext;
23+
typedef struct JsonManifestParseIncrementalState JsonManifestParseIncrementalState;
2324

2425
typedef void (*json_manifest_version_callback) (JsonManifestParseContext *,
2526
int manifest_version);
@@ -48,5 +49,9 @@ struct JsonManifestParseContext
4849

4950
extern void json_parse_manifest(JsonManifestParseContext *context,
5051
char *buffer, size_t size);
52+
extern JsonManifestParseIncrementalState *json_parse_manifest_incremental_init(JsonManifestParseContext *context);
53+
extern void json_parse_manifest_incremental_chunk(
54+
JsonManifestParseIncrementalState *incstate, char *chunk, int size,
55+
bool is_last);
5156

5257
#endif

0 commit comments

Comments
 (0)