Skip to content

Commit 25fe401

Browse files
committed
Fix pg_restore's misdesigned code for detecting archive file format.
Despite the clear comments pointing out that the duplicative code segments in ReadHead() and _discoverArchiveFormat() needed to be in sync, they were not: the latter did not bother to apply any of the sanity checks in the former. We'd missed noticing this partly because none of those checks would fail in scenarios we customarily test, and partly because the oversight would be masked if both segments execute, which they would in cases other than needing to autodetect the format of a non-seekable stdin source. However, in a case meeting all these requirements --- for example, trying to read a newer-than-supported archive format from non-seekable stdin --- pg_restore missed applying the version check and would likely dump core or otherwise misbehave. The whole thing is silly anyway, because there seems little reason to duplicate the logic beyond the one-line verification that the file starts with "PGDMP". There seems to have been an undocumented assumption that multiple major formats (major enough to require separate reader modules) would nonetheless share the first half-dozen fields of the custom-format header. This seems unlikely, so let's fix it by just nuking the duplicate logic in _discoverArchiveFormat(). Also get rid of the pointless attempt to seek back to the start of the file after successful autodetection. That wastes cycles and it means we have four behaviors to verify not two. Per bug #16951 from Sergey Koposov. This has been broken for decades, so back-patch to all supported versions. Discussion: https://postgr.es/m/16951-a4dd68cf0de23048@postgresql.org
1 parent dd5d04e commit 25fe401

File tree

3 files changed

+51
-110
lines changed

3 files changed

+51
-110
lines changed

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 39 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -2104,6 +2104,7 @@ _discoverArchiveFormat(ArchiveHandle *AH)
21042104
if (AH->lookahead)
21052105
free(AH->lookahead);
21062106

2107+
AH->readHeader = 0;
21072108
AH->lookaheadSize = 512;
21082109
AH->lookahead = pg_malloc0(512);
21092110
AH->lookaheadLen = 0;
@@ -2177,62 +2178,9 @@ _discoverArchiveFormat(ArchiveHandle *AH)
21772178

21782179
if (strncmp(sig, "PGDMP", 5) == 0)
21792180
{
2180-
int byteread;
2181-
char vmaj,
2182-
vmin,
2183-
vrev;
2184-
2185-
/*
2186-
* Finish reading (most of) a custom-format header.
2187-
*
2188-
* NB: this code must agree with ReadHead().
2189-
*/
2190-
if ((byteread = fgetc(fh)) == EOF)
2191-
READ_ERROR_EXIT(fh);
2192-
2193-
vmaj = byteread;
2194-
2195-
if ((byteread = fgetc(fh)) == EOF)
2196-
READ_ERROR_EXIT(fh);
2197-
2198-
vmin = byteread;
2199-
2200-
/* Save these too... */
2201-
AH->lookahead[AH->lookaheadLen++] = vmaj;
2202-
AH->lookahead[AH->lookaheadLen++] = vmin;
2203-
2204-
/* Check header version; varies from V1.0 */
2205-
if (vmaj > 1 || (vmaj == 1 && vmin > 0)) /* Version > 1.0 */
2206-
{
2207-
if ((byteread = fgetc(fh)) == EOF)
2208-
READ_ERROR_EXIT(fh);
2209-
2210-
vrev = byteread;
2211-
AH->lookahead[AH->lookaheadLen++] = vrev;
2212-
}
2213-
else
2214-
vrev = 0;
2215-
2216-
AH->version = MAKE_ARCHIVE_VERSION(vmaj, vmin, vrev);
2217-
2218-
if ((AH->intSize = fgetc(fh)) == EOF)
2219-
READ_ERROR_EXIT(fh);
2220-
AH->lookahead[AH->lookaheadLen++] = AH->intSize;
2221-
2222-
if (AH->version >= K_VERS_1_7)
2223-
{
2224-
if ((AH->offSize = fgetc(fh)) == EOF)
2225-
READ_ERROR_EXIT(fh);
2226-
AH->lookahead[AH->lookaheadLen++] = AH->offSize;
2227-
}
2228-
else
2229-
AH->offSize = AH->intSize;
2230-
2231-
if ((byteread = fgetc(fh)) == EOF)
2232-
READ_ERROR_EXIT(fh);
2233-
2234-
AH->format = byteread;
2235-
AH->lookahead[AH->lookaheadLen++] = AH->format;
2181+
/* It's custom format, stop here */
2182+
AH->format = archCustom;
2183+
AH->readHeader = 1;
22362184
}
22372185
else
22382186
{
@@ -2269,23 +2217,16 @@ _discoverArchiveFormat(ArchiveHandle *AH)
22692217
AH->format = archTar;
22702218
}
22712219

2272-
/* If we can't seek, then mark the header as read */
2273-
if (fseeko(fh, 0, SEEK_SET) != 0)
2274-
{
2275-
/*
2276-
* NOTE: Formats that use the lookahead buffer can unset this in their
2277-
* Init routine.
2278-
*/
2279-
AH->readHeader = 1;
2280-
}
2281-
else
2282-
AH->lookaheadLen = 0; /* Don't bother since we've reset the file */
2283-
2284-
/* Close the file */
2220+
/* Close the file if we opened it */
22852221
if (wantClose)
2222+
{
22862223
if (fclose(fh) != 0)
22872224
exit_horribly(modulename, "could not close input file: %s\n",
22882225
strerror(errno));
2226+
/* Forget lookahead, since we'll re-read header after re-opening */
2227+
AH->readHeader = 0;
2228+
AH->lookaheadLen = 0;
2229+
}
22892230

22902231
return AH->format;
22912232
}
@@ -3778,7 +3719,9 @@ WriteHead(ArchiveHandle *AH)
37783719
void
37793720
ReadHead(ArchiveHandle *AH)
37803721
{
3781-
char tmpMag[7];
3722+
char vmaj,
3723+
vmin,
3724+
vrev;
37823725
int fmt;
37833726
struct tm crtm;
37843727

@@ -3790,48 +3733,46 @@ ReadHead(ArchiveHandle *AH)
37903733
*/
37913734
if (!AH->readHeader)
37923735
{
3793-
char vmaj,
3794-
vmin,
3795-
vrev;
3736+
char tmpMag[7];
37963737

37973738
AH->ReadBufPtr(AH, tmpMag, 5);
37983739

37993740
if (strncmp(tmpMag, "PGDMP", 5) != 0)
38003741
exit_horribly(modulename, "did not find magic string in file header\n");
3742+
}
38013743

3802-
vmaj = AH->ReadBytePtr(AH);
3803-
vmin = AH->ReadBytePtr(AH);
3744+
vmaj = AH->ReadBytePtr(AH);
3745+
vmin = AH->ReadBytePtr(AH);
38043746

3805-
if (vmaj > 1 || (vmaj == 1 && vmin > 0)) /* Version > 1.0 */
3806-
vrev = AH->ReadBytePtr(AH);
3807-
else
3808-
vrev = 0;
3747+
if (vmaj > 1 || (vmaj == 1 && vmin > 0)) /* Version > 1.0 */
3748+
vrev = AH->ReadBytePtr(AH);
3749+
else
3750+
vrev = 0;
38093751

3810-
AH->version = MAKE_ARCHIVE_VERSION(vmaj, vmin, vrev);
3752+
AH->version = MAKE_ARCHIVE_VERSION(vmaj, vmin, vrev);
38113753

3812-
if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX)
3813-
exit_horribly(modulename, "unsupported version (%d.%d) in file header\n",
3814-
vmaj, vmin);
3754+
if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX)
3755+
exit_horribly(modulename, "unsupported version (%d.%d) in file header\n",
3756+
vmaj, vmin);
38153757

3816-
AH->intSize = AH->ReadBytePtr(AH);
3817-
if (AH->intSize > 32)
3818-
exit_horribly(modulename, "sanity check on integer size (%lu) failed\n",
3819-
(unsigned long) AH->intSize);
3758+
AH->intSize = AH->ReadBytePtr(AH);
3759+
if (AH->intSize > 32)
3760+
exit_horribly(modulename, "sanity check on integer size (%lu) failed\n",
3761+
(unsigned long) AH->intSize);
38203762

3821-
if (AH->intSize > sizeof(int))
3822-
write_msg(modulename, "WARNING: archive was made on a machine with larger integers, some operations might fail\n");
3763+
if (AH->intSize > sizeof(int))
3764+
write_msg(modulename, "WARNING: archive was made on a machine with larger integers, some operations might fail\n");
38233765

3824-
if (AH->version >= K_VERS_1_7)
3825-
AH->offSize = AH->ReadBytePtr(AH);
3826-
else
3827-
AH->offSize = AH->intSize;
3766+
if (AH->version >= K_VERS_1_7)
3767+
AH->offSize = AH->ReadBytePtr(AH);
3768+
else
3769+
AH->offSize = AH->intSize;
38283770

3829-
fmt = AH->ReadBytePtr(AH);
3771+
fmt = AH->ReadBytePtr(AH);
38303772

3831-
if (AH->format != fmt)
3832-
exit_horribly(modulename, "expected format (%d) differs from format found in file (%d)\n",
3833-
AH->format, fmt);
3834-
}
3773+
if (AH->format != fmt)
3774+
exit_horribly(modulename, "expected format (%d) differs from format found in file (%d)\n",
3775+
AH->format, fmt);
38353776

38363777
if (AH->version >= K_VERS_1_2)
38373778
{

src/bin/pg_dump/pg_backup_archiver.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -262,15 +262,21 @@ struct _archiveHandle
262262
time_t createDate; /* Date archive created */
263263

264264
/*
265-
* Fields used when discovering header. A format can always get the
266-
* previous read bytes from here...
265+
* Fields used when discovering archive format. For tar format, we load
266+
* the first block into the lookahead buffer, and verify that it looks
267+
* like a tar header. The tar module must then consume bytes from the
268+
* lookahead buffer before reading any more from the file. For custom
269+
* format, we load only the "PGDMP" marker into the buffer, and then set
270+
* readHeader after confirming it matches. The buffer is vestigial in
271+
* this case, as the subsequent code just checks readHeader and doesn't
272+
* examine the buffer.
267273
*/
268-
int readHeader; /* Used if file header has been read already */
274+
int readHeader; /* Set if we already read "PGDMP" marker */
269275
char *lookahead; /* Buffer used when reading header to discover
270276
* format */
271-
size_t lookaheadSize; /* Size of allocated buffer */
272-
size_t lookaheadLen; /* Length of data in lookahead */
273-
pgoff_t lookaheadPos; /* Current read position in lookahead buffer */
277+
size_t lookaheadSize; /* Allocated size of buffer */
278+
size_t lookaheadLen; /* Length of valid data in lookahead */
279+
size_t lookaheadPos; /* Current read position in lookahead buffer */
274280

275281
ArchiveEntryPtrType ArchiveEntryPtr; /* Called for each metadata object */
276282
StartDataPtrType StartDataPtr; /* Called when table data is about to be

src/bin/pg_dump/pg_backup_tar.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,6 @@ InitArchiveFmt_Tar(ArchiveHandle *AH)
236236

237237
ctx->hasSeek = checkSeek(ctx->tarFH);
238238

239-
/*
240-
* Forcibly unmark the header as read since we use the lookahead
241-
* buffer
242-
*/
243-
AH->readHeader = 0;
244-
245239
ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
246240
ReadHead(AH);
247241
ReadToc(AH);

0 commit comments

Comments
 (0)