Skip to content

Commit 0e61852

Browse files
committed
Adopt the GNU convention for handling tar-archive members exceeding 8GB.
The POSIX standard for tar headers requires archive member sizes to be printed in octal with at most 11 digits, limiting the representable file size to 8GB. However, GNU tar and apparently most other modern tars support a convention in which oversized values can be stored in base-256, allowing any practical file to be a tar member. Adopt this convention to remove two limitations: * pg_dump with -Ft output format failed if the contents of any one table exceeded 8GB. * pg_basebackup failed if the data directory contained any file exceeding 8GB. (This would be a fatal problem for installations configured with a table segment size of 8GB or more, and it has also been seen to fail when large core dump files exist in the data directory.) File sizes under 8GB are still printed in octal, so that no compatibility issues are created except in cases that would have failed entirely before. In addition, this patch fixes several bugs in the same area: * In 9.3 and later, we'd defined tarCreateHeader's file-size argument as size_t, which meant that on 32-bit machines it would write a corrupt tar header for file sizes between 4GB and 8GB, even though no error was raised. This broke both "pg_dump -Ft" and pg_basebackup for such cases. * pg_restore from a tar archive would fail on tables of size between 4GB and 8GB, on machines where either "size_t" or "unsigned long" is 32 bits. This happened even with an archive file not affected by the previous bug. * pg_basebackup would fail if there were files of size between 4GB and 8GB, even on 64-bit machines. * In 9.3 and later, "pg_basebackup -Ft" failed entirely, for any file size, on 64-bit big-endian machines. In view of these potential data-loss bugs, back-patch to all supported branches, even though removal of the documented 8GB limit might otherwise be considered a new feature rather than a bug fix.
1 parent 64349f1 commit 0e61852

File tree

6 files changed

+124
-115
lines changed

6 files changed

+124
-115
lines changed

doc/src/sgml/ref/pg_dump.sgml

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,12 @@ PostgreSQL documentation
265265
<listitem>
266266
<para>
267267
Output a <command>tar</command>-format archive suitable for input
268-
into <application>pg_restore</application>. The tar-format is
269-
compatible with the directory-format; extracting a tar-format
268+
into <application>pg_restore</application>. The tar format is
269+
compatible with the directory format: extracting a tar-format
270270
archive produces a valid directory-format archive.
271-
However, the tar-format does not support compression and has a
272-
limit of 8 GB on the size of individual tables. Also, the relative
273-
order of table data items cannot be changed during restore.
271+
However, the tar format does not support compression. Also, when
272+
using tar format the relative order of table data items cannot be
273+
changed during restore.
274274
</para>
275275
</listitem>
276276
</varlistentry>
@@ -1075,15 +1075,6 @@ CREATE DATABASE foo WITH TEMPLATE template0;
10751075
catalogs might be left in the wrong state.
10761076
</para>
10771077

1078-
<para>
1079-
Members of tar archives are limited to a size less than 8 GB.
1080-
(This is an inherent limitation of the tar file format.) Therefore
1081-
this format cannot be used if the textual representation of any one table
1082-
exceeds that size. The total size of a tar archive and any of the
1083-
other output formats is not limited, except possibly by the
1084-
operating system.
1085-
</para>
1086-
10871078
<para>
10881079
The dump file produced by <application>pg_dump</application>
10891080
does not contain the statistics used by the optimizer to make

src/backend/replication/basebackup.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,13 +1035,6 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
10351035
*/
10361036

10371037

1038-
/*
1039-
* Maximum file size for a tar member: The limit inherent in the
1040-
* format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed
1041-
* what we can represent in pgoff_t.
1042-
*/
1043-
#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1)
1044-
10451038
/*
10461039
* Given the member, write the TAR header & send the file.
10471040
*
@@ -1070,15 +1063,6 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
10701063
errmsg("could not open file \"%s\": %m", readfilename)));
10711064
}
10721065

1073-
/*
1074-
* Some compilers will throw a warning knowing this test can never be true
1075-
* because pgoff_t can't exceed the compared maximum on their platform.
1076-
*/
1077-
if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN)
1078-
ereport(ERROR,
1079-
(errmsg("archive member \"%s\" too large for tar format",
1080-
tarfilename)));
1081-
10821066
_tarWriteHeader(tarfilename, NULL, statbuf);
10831067

10841068
while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)

src/bin/pg_basebackup/pg_basebackup.c

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
568568
bool in_tarhdr = true;
569569
bool skip_file = false;
570570
size_t tarhdrsz = 0;
571-
size_t filesz = 0;
571+
pgoff_t filesz = 0;
572572

573573
#ifdef HAVE_LIBZ
574574
gzFile ztarfile = NULL;
@@ -833,7 +833,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
833833

834834
skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0);
835835

836-
sscanf(&tarhdr[124], "%11o", (unsigned int *) &filesz);
836+
filesz = read_tar_number(&tarhdr[124], 12);
837837

838838
padding = ((filesz + 511) & ~511) - filesz;
839839
filesz += padding;
@@ -908,7 +908,7 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
908908
{
909909
char current_path[MAXPGPATH];
910910
char filename[MAXPGPATH];
911-
int current_len_left;
911+
pgoff_t current_len_left = 0;
912912
int current_padding = 0;
913913
bool basetablespace = PQgetisnull(res, rownum, 0);
914914
char *copybuf = NULL;
@@ -974,20 +974,10 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
974974
}
975975
totaldone += 512;
976976

977-
if (sscanf(copybuf + 124, "%11o", &current_len_left) != 1)
978-
{
979-
fprintf(stderr, _("%s: could not parse file size\n"),
980-
progname);
981-
disconnect_and_exit(1);
982-
}
977+
current_len_left = read_tar_number(&copybuf[124], 12);
983978

984979
/* Set permissions on the file */
985-
if (sscanf(&copybuf[100], "%07o ", &filemode) != 1)
986-
{
987-
fprintf(stderr, _("%s: could not parse file mode\n"),
988-
progname);
989-
disconnect_and_exit(1);
990-
}
980+
filemode = read_tar_number(&copybuf[100], 8);
991981

992982
/*
993983
* All files are padded up to 512 bytes

src/bin/pg_dump/pg_backup_tar.c

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,6 @@ typedef struct
7979
ArchiveHandle *AH;
8080
} TAR_MEMBER;
8181

82-
/*
83-
* Maximum file size for a tar member: The limit inherent in the
84-
* format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed
85-
* what we can represent in pgoff_t.
86-
*/
87-
#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1)
88-
8982
typedef struct
9083
{
9184
int hasSeek;
@@ -1036,7 +1029,7 @@ isValidTarHeader(char *header)
10361029
int sum;
10371030
int chk = tarChecksum(header);
10381031

1039-
sscanf(&header[148], "%8o", &sum);
1032+
sum = read_tar_number(&header[148], 8);
10401033

10411034
if (sum != chk)
10421035
return false;
@@ -1075,13 +1068,6 @@ _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
10751068
th->fileLen = ftello(tmp);
10761069
fseeko(tmp, 0, SEEK_SET);
10771070

1078-
/*
1079-
* Some compilers will throw a warning knowing this test can never be true
1080-
* because pgoff_t can't exceed the compared maximum on their platform.
1081-
*/
1082-
if (th->fileLen > MAX_TAR_MEMBER_FILELEN)
1083-
exit_horribly(modulename, "archive member too large for tar format\n");
1084-
10851071
_tarWriteHeader(th);
10861072

10871073
while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
@@ -1207,11 +1193,10 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
12071193
{
12081194
lclContext *ctx = (lclContext *) AH->formatData;
12091195
char h[512];
1210-
char tag[100];
1196+
char tag[100 + 1];
12111197
int sum,
12121198
chk;
1213-
size_t len;
1214-
unsigned long ullen;
1199+
pgoff_t len;
12151200
pgoff_t hPos;
12161201
bool gotBlock = false;
12171202

@@ -1248,7 +1233,7 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
12481233

12491234
/* Calc checksum */
12501235
chk = tarChecksum(h);
1251-
sscanf(&h[148], "%8o", &sum);
1236+
sum = read_tar_number(&h[148], 8);
12521237

12531238
/*
12541239
* If the checksum failed, see if it is a null block. If so, silently
@@ -1271,27 +1256,31 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
12711256
}
12721257
}
12731258

1274-
sscanf(&h[0], "%99s", tag);
1275-
sscanf(&h[124], "%12lo", &ullen);
1276-
len = (size_t) ullen;
1259+
/* Name field is 100 bytes, might not be null-terminated */
1260+
strlcpy(tag, &h[0], 100 + 1);
1261+
1262+
len = read_tar_number(&h[124], 12);
12771263

12781264
{
1279-
char buf[100];
1265+
char posbuf[32];
1266+
char lenbuf[32];
12801267

1281-
snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) hPos);
1282-
ahlog(AH, 3, "TOC Entry %s at %s (length %lu, checksum %d)\n",
1283-
tag, buf, (unsigned long) len, sum);
1268+
snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
1269+
snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
1270+
ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n",
1271+
tag, posbuf, lenbuf, sum);
12841272
}
12851273

12861274
if (chk != sum)
12871275
{
1288-
char buf[100];
1276+
char posbuf[32];
12891277

1290-
snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ftello(ctx->tarFH));
1278+
snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
1279+
(uint64) ftello(ctx->tarFH));
12911280
exit_horribly(modulename,
12921281
"corrupt tar header found in %s "
12931282
"(expected %d, computed %d) file position %s\n",
1294-
tag, sum, chk, buf);
1283+
tag, sum, chk, posbuf);
12951284
}
12961285

12971286
th->targetFile = pg_strdup(tag);
@@ -1306,7 +1295,8 @@ _tarWriteHeader(TAR_MEMBER *th)
13061295
{
13071296
char h[512];
13081297

1309-
tarCreateHeader(h, th->targetFile, NULL, th->fileLen, 0600, 04000, 02000, time(NULL));
1298+
tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
1299+
0600, 04000, 02000, time(NULL));
13101300

13111301
/* Now write the completed header. */
13121302
if (fwrite(h, 1, 512, th->tarFH) != 512)

src/include/pgtar.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,7 @@
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
14-
extern void tarCreateHeader(char *h, const char *filename, const char *linktarget, size_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime);
14+
extern void tarCreateHeader(char *h, const char *filename, const char *linktarget,
15+
pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime);
16+
extern uint64 read_tar_number(const char *s, int len);
1517
extern int tarChecksum(char *header);

0 commit comments

Comments
 (0)