Skip to content

Commit 2c9b857

Browse files
committed
Fix pg_restore's misdesigned code for detecting archive file format.
Despite the clear comments pointing out that the duplicative code segments in ReadHead() and _discoverArchiveFormat() needed to be in sync, they were not: the latter did not bother to apply any of the sanity checks in the former. We'd missed noticing this partly because none of those checks would fail in scenarios we customarily test, and partly because the oversight would be masked if both segments execute, which they would in cases other than needing to autodetect the format of a non-seekable stdin source. However, in a case meeting all these requirements --- for example, trying to read a newer-than-supported archive format from non-seekable stdin --- pg_restore missed applying the version check and would likely dump core or otherwise misbehave. The whole thing is silly anyway, because there seems little reason to duplicate the logic beyond the one-line verification that the file starts with "PGDMP". There seems to have been an undocumented assumption that multiple major formats (major enough to require separate reader modules) would nonetheless share the first half-dozen fields of the custom-format header. This seems unlikely, so let's fix it by just nuking the duplicate logic in _discoverArchiveFormat(). Also get rid of the pointless attempt to seek back to the start of the file after successful autodetection. That wastes cycles and it means we have four behaviors to verify not two. Per bug #16951 from Sergey Koposov. This has been broken for decades, so back-patch to all supported versions. Discussion: https://postgr.es/m/16951-a4dd68cf0de23048@postgresql.org
1 parent 5ae0743 commit 2c9b857

File tree

3 files changed

+49
-105
lines changed

3 files changed

+49
-105
lines changed

src/bin/pg_dump/pg_backup_archiver.c

+37-93
Original file line numberDiff line numberDiff line change
@@ -2106,6 +2106,7 @@ _discoverArchiveFormat(ArchiveHandle *AH)
21062106
if (AH->lookahead)
21072107
free(AH->lookahead);
21082108

2109+
AH->readHeader = 0;
21092110
AH->lookaheadSize = 512;
21102111
AH->lookahead = pg_malloc0(512);
21112112
AH->lookaheadLen = 0;
@@ -2179,60 +2180,9 @@ _discoverArchiveFormat(ArchiveHandle *AH)
21792180

21802181
if (strncmp(sig, "PGDMP", 5) == 0)
21812182
{
2182-
int byteread;
2183-
2184-
/*
2185-
* Finish reading (most of) a custom-format header.
2186-
*
2187-
* NB: this code must agree with ReadHead().
2188-
*/
2189-
if ((byteread = fgetc(fh)) == EOF)
2190-
READ_ERROR_EXIT(fh);
2191-
2192-
AH->vmaj = byteread;
2193-
2194-
if ((byteread = fgetc(fh)) == EOF)
2195-
READ_ERROR_EXIT(fh);
2196-
2197-
AH->vmin = byteread;
2198-
2199-
/* Save these too... */
2200-
AH->lookahead[AH->lookaheadLen++] = AH->vmaj;
2201-
AH->lookahead[AH->lookaheadLen++] = AH->vmin;
2202-
2203-
/* Check header version; varies from V1.0 */
2204-
if (AH->vmaj > 1 || ((AH->vmaj == 1) && (AH->vmin > 0))) /* Version > 1.0 */
2205-
{
2206-
if ((byteread = fgetc(fh)) == EOF)
2207-
READ_ERROR_EXIT(fh);
2208-
2209-
AH->vrev = byteread;
2210-
AH->lookahead[AH->lookaheadLen++] = AH->vrev;
2211-
}
2212-
else
2213-
AH->vrev = 0;
2214-
2215-
/* Make a convenient integer <maj><min><rev>00 */
2216-
AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0;
2217-
2218-
if ((AH->intSize = fgetc(fh)) == EOF)
2219-
READ_ERROR_EXIT(fh);
2220-
AH->lookahead[AH->lookaheadLen++] = AH->intSize;
2221-
2222-
if (AH->version >= K_VERS_1_7)
2223-
{
2224-
if ((AH->offSize = fgetc(fh)) == EOF)
2225-
READ_ERROR_EXIT(fh);
2226-
AH->lookahead[AH->lookaheadLen++] = AH->offSize;
2227-
}
2228-
else
2229-
AH->offSize = AH->intSize;
2230-
2231-
if ((byteread = fgetc(fh)) == EOF)
2232-
READ_ERROR_EXIT(fh);
2233-
2234-
AH->format = byteread;
2235-
AH->lookahead[AH->lookaheadLen++] = AH->format;
2183+
/* It's custom format, stop here */
2184+
AH->format = archCustom;
2185+
AH->readHeader = 1;
22362186
}
22372187
else
22382188
{
@@ -2269,23 +2219,16 @@ _discoverArchiveFormat(ArchiveHandle *AH)
22692219
AH->format = archTar;
22702220
}
22712221

2272-
/* If we can't seek, then mark the header as read */
2273-
if (fseeko(fh, 0, SEEK_SET) != 0)
2274-
{
2275-
/*
2276-
* NOTE: Formats that use the lookahead buffer can unset this in their
2277-
* Init routine.
2278-
*/
2279-
AH->readHeader = 1;
2280-
}
2281-
else
2282-
AH->lookaheadLen = 0; /* Don't bother since we've reset the file */
2283-
2284-
/* Close the file */
2222+
/* Close the file if we opened it */
22852223
if (wantClose)
2224+
{
22862225
if (fclose(fh) != 0)
22872226
exit_horribly(modulename, "could not close input file: %s\n",
22882227
strerror(errno));
2228+
/* Forget lookahead, since we'll re-read header after re-opening */
2229+
AH->readHeader = 0;
2230+
AH->lookaheadLen = 0;
2231+
}
22892232

22902233
return AH->format;
22912234
}
@@ -3671,7 +3614,6 @@ WriteHead(ArchiveHandle *AH)
36713614
void
36723615
ReadHead(ArchiveHandle *AH)
36733616
{
3674-
char tmpMag[7];
36753617
int fmt;
36763618
struct tm crtm;
36773619

@@ -3683,44 +3625,46 @@ ReadHead(ArchiveHandle *AH)
36833625
*/
36843626
if (!AH->readHeader)
36853627
{
3628+
char tmpMag[7];
3629+
36863630
(*AH->ReadBufPtr) (AH, tmpMag, 5);
36873631

36883632
if (strncmp(tmpMag, "PGDMP", 5) != 0)
36893633
exit_horribly(modulename, "did not find magic string in file header\n");
3634+
}
36903635

3691-
AH->vmaj = (*AH->ReadBytePtr) (AH);
3692-
AH->vmin = (*AH->ReadBytePtr) (AH);
3636+
AH->vmaj = (*AH->ReadBytePtr) (AH);
3637+
AH->vmin = (*AH->ReadBytePtr) (AH);
36933638

3694-
if (AH->vmaj > 1 || ((AH->vmaj == 1) && (AH->vmin > 0))) /* Version > 1.0 */
3695-
AH->vrev = (*AH->ReadBytePtr) (AH);
3696-
else
3697-
AH->vrev = 0;
3639+
if (AH->vmaj > 1 || ((AH->vmaj == 1) && (AH->vmin > 0))) /* Version > 1.0 */
3640+
AH->vrev = (*AH->ReadBytePtr) (AH);
3641+
else
3642+
AH->vrev = 0;
36983643

3699-
AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0;
3644+
AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0;
37003645

3701-
if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX)
3702-
exit_horribly(modulename, "unsupported version (%d.%d) in file header\n",
3703-
AH->vmaj, AH->vmin);
3646+
if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX)
3647+
exit_horribly(modulename, "unsupported version (%d.%d) in file header\n",
3648+
AH->vmaj, AH->vmin);
37043649

3705-
AH->intSize = (*AH->ReadBytePtr) (AH);
3706-
if (AH->intSize > 32)
3707-
exit_horribly(modulename, "sanity check on integer size (%lu) failed\n",
3708-
(unsigned long) AH->intSize);
3650+
AH->intSize = (*AH->ReadBytePtr) (AH);
3651+
if (AH->intSize > 32)
3652+
exit_horribly(modulename, "sanity check on integer size (%lu) failed\n",
3653+
(unsigned long) AH->intSize);
37093654

3710-
if (AH->intSize > sizeof(int))
3711-
write_msg(modulename, "WARNING: archive was made on a machine with larger integers, some operations might fail\n");
3655+
if (AH->intSize > sizeof(int))
3656+
write_msg(modulename, "WARNING: archive was made on a machine with larger integers, some operations might fail\n");
37123657

3713-
if (AH->version >= K_VERS_1_7)
3714-
AH->offSize = (*AH->ReadBytePtr) (AH);
3715-
else
3716-
AH->offSize = AH->intSize;
3658+
if (AH->version >= K_VERS_1_7)
3659+
AH->offSize = (*AH->ReadBytePtr) (AH);
3660+
else
3661+
AH->offSize = AH->intSize;
37173662

3718-
fmt = (*AH->ReadBytePtr) (AH);
3663+
fmt = (*AH->ReadBytePtr) (AH);
37193664

3720-
if (AH->format != fmt)
3721-
exit_horribly(modulename, "expected format (%d) differs from format found in file (%d)\n",
3722-
AH->format, fmt);
3723-
}
3665+
if (AH->format != fmt)
3666+
exit_horribly(modulename, "expected format (%d) differs from format found in file (%d)\n",
3667+
AH->format, fmt);
37243668

37253669
if (AH->version >= K_VERS_1_2)
37263670
{

src/bin/pg_dump/pg_backup_archiver.h

+12-6
Original file line numberDiff line numberDiff line change
@@ -262,15 +262,21 @@ struct _archiveHandle
262262
time_t createDate; /* Date archive created */
263263

264264
/*
265-
* Fields used when discovering header. A format can always get the
266-
* previous read bytes from here...
265+
* Fields used when discovering archive format. For tar format, we load
266+
* the first block into the lookahead buffer, and verify that it looks
267+
* like a tar header. The tar module must then consume bytes from the
268+
* lookahead buffer before reading any more from the file. For custom
269+
* format, we load only the "PGDMP" marker into the buffer, and then set
270+
* readHeader after confirming it matches. The buffer is vestigial in
271+
* this case, as the subsequent code just checks readHeader and doesn't
272+
* examine the buffer.
267273
*/
268-
int readHeader; /* Used if file header has been read already */
274+
int readHeader; /* Set if we already read "PGDMP" marker */
269275
char *lookahead; /* Buffer used when reading header to discover
270276
* format */
271-
size_t lookaheadSize; /* Size of allocated buffer */
272-
size_t lookaheadLen; /* Length of data in lookahead */
273-
pgoff_t lookaheadPos; /* Current read position in lookahead buffer */
277+
size_t lookaheadSize; /* Allocated size of buffer */
278+
size_t lookaheadLen; /* Length of valid data in lookahead */
279+
size_t lookaheadPos; /* Current read position in lookahead buffer */
274280

275281
ArchiveEntryPtr ArchiveEntryPtr; /* Called for each metadata object */
276282
StartDataPtr StartDataPtr; /* Called when table data is about to be

src/bin/pg_dump/pg_backup_tar.c

-6
Original file line numberDiff line numberDiff line change
@@ -238,12 +238,6 @@ InitArchiveFmt_Tar(ArchiveHandle *AH)
238238

239239
ctx->hasSeek = checkSeek(ctx->tarFH);
240240

241-
/*
242-
* Forcibly unmark the header as read since we use the lookahead
243-
* buffer
244-
*/
245-
AH->readHeader = 0;
246-
247241
ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
248242
ReadHead(AH);
249243
ReadToc(AH);

0 commit comments

Comments
 (0)