Skip to content

Commit dfa6eda

Browse files
committed
Fix tar files emitted by pg_basebackup to be POSIX conformant.
Back-patch portions of commit 05b555d. There doesn't seem to be any reason not to fix pg_basebackup fully, but we can't change pg_dump's "magic" string without breaking older versions of pg_restore. Instead, just patch pg_restore to accept either version of the magic string, in hopes of avoiding compatibility problems when 9.3 comes out. I also fixed pg_dump to write the correct 2-block EOF marker, since that won't create a compatibility problem with pg_restore and it could help with some versions of tar. Brian Weaver and Tom Lane
1 parent bc99397 commit dfa6eda

File tree

3 files changed

+64
-34
lines changed

3 files changed

+64
-34
lines changed

doc/src/sgml/protocol.sgml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,9 +1726,11 @@ The commands accepted in walsender mode are:
17261726
After the second regular result set, one or more CopyResponse results
17271727
will be sent, one for PGDATA and one for each additional tablespace other
17281728
than <literal>pg_default</> and <literal>pg_global</>. The data in
1729-
the CopyResponse results will be a tar format (using ustar00
1730-
extensions) dump of the tablespace contents. After the tar data is
1731-
complete, a final ordinary result set will be sent.
1729+
the CopyResponse results will be a tar format (following the
1730+
<quote>ustar interchange format</> specified in the POSIX 1003.1-2008
1731+
standard) dump of the tablespace contents, except that the two trailing
1732+
blocks of zeroes specified in the standard are omitted.
1733+
After the tar data is complete, a final ordinary result set will be sent.
17321734
</para>
17331735

17341736
<para>

src/backend/replication/basebackup.c

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ sendFileWithContent(const char *filename, const char *content)
554554

555555
/*
556556
* Include all files from the given directory in the output tar stream. If
557-
* 'sizeonly' is true, we just calculate a total length and return ig, without
557+
* 'sizeonly' is true, we just calculate a total length and return it, without
558558
* actually sending anything.
559559
*/
560560
static int64
@@ -744,11 +744,16 @@ _tarChecksum(char *header)
744744
int i,
745745
sum;
746746

747-
sum = 0;
747+
/*
748+
* Per POSIX, the checksum is the simple sum of all bytes in the header,
749+
* treating the bytes as unsigned, and treating the checksum field (at
750+
* offset 148) as though it contained 8 spaces.
751+
*/
752+
sum = 8 * ' '; /* presumed value for checksum field */
748753
for (i = 0; i < 512; i++)
749754
if (i < 148 || i >= 156)
750755
sum += 0xFF & header[i];
751-
return sum + 256; /* Assume 8 blanks in checksum field */
756+
return sum;
752757
}
753758

754759
/* Given the member, write the TAR header & send the file */
@@ -827,9 +832,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
827832
struct stat * statbuf)
828833
{
829834
char h[512];
830-
int lastSum = 0;
831-
int sum;
832835

836+
/*
837+
* Note: most of the fields in a tar header are not supposed to be
838+
* null-terminated. We use sprintf, which will write a null after the
839+
* required bytes; that null goes into the first byte of the next field.
840+
* This is okay as long as we fill the fields in order.
841+
*/
833842
memset(h, 0, sizeof(h));
834843

835844
/* Name 100 */
@@ -841,8 +850,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
841850
* indicated in the tar format by adding a slash at the end of the
842851
* name, the same as for regular directories.
843852
*/
844-
h[strlen(filename)] = '/';
845-
h[strlen(filename) + 1] = '\0';
853+
int flen = strlen(filename);
854+
855+
flen = Min(flen, 99);
856+
h[flen] = '/';
857+
h[flen + 1] = '\0';
846858
}
847859

848860
/* Mode 8 */
@@ -852,9 +864,9 @@ _tarWriteHeader(const char *filename, const char *linktarget,
852864
sprintf(&h[108], "%07o ", statbuf->st_uid);
853865

854866
/* Group 8 */
855-
sprintf(&h[117], "%07o ", statbuf->st_gid);
867+
sprintf(&h[116], "%07o ", statbuf->st_gid);
856868

857-
/* File size 12 - 11 digits, 1 space, no NUL */
869+
/* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
858870
if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
859871
/* Symbolic link or directory has size zero */
860872
print_val(&h[124], 0, 8, 11);
@@ -865,13 +877,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
865877
/* Mod Time 12 */
866878
sprintf(&h[136], "%011o ", (int) statbuf->st_mtime);
867879

868-
/* Checksum 8 */
869-
sprintf(&h[148], "%06o ", lastSum);
880+
/* Checksum 8 cannot be calculated until we've filled all other fields */
870881

871882
if (linktarget != NULL)
872883
{
873884
/* Type - Symbolic link */
874885
sprintf(&h[156], "2");
886+
/* Link Name 100 */
875887
sprintf(&h[157], "%.99s", linktarget);
876888
}
877889
else if (S_ISDIR(statbuf->st_mode))
@@ -881,10 +893,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
881893
/* Type - regular file */
882894
sprintf(&h[156], "0");
883895

884-
/* Link tag 100 (NULL) */
896+
/* Magic 6 */
897+
sprintf(&h[257], "ustar");
885898

886-
/* Magic 6 + Version 2 */
887-
sprintf(&h[257], "ustar00");
899+
/* Version 2 */
900+
sprintf(&h[263], "00");
888901

889902
/* User 32 */
890903
/* XXX: Do we need to care about setting correct username? */
@@ -894,17 +907,21 @@ _tarWriteHeader(const char *filename, const char *linktarget,
894907
/* XXX: Do we need to care about setting correct group name? */
895908
sprintf(&h[297], "%.31s", "postgres");
896909

897-
/* Maj Dev 8 */
898-
sprintf(&h[329], "%6o ", 0);
910+
/* Major Dev 8 */
911+
sprintf(&h[329], "%07o ", 0);
899912

900-
/* Min Dev 8 */
901-
sprintf(&h[337], "%6o ", 0);
913+
/* Minor Dev 8 */
914+
sprintf(&h[337], "%07o ", 0);
902915

903-
while ((sum = _tarChecksum(h)) != lastSum)
904-
{
905-
sprintf(&h[148], "%06o ", sum);
906-
lastSum = sum;
907-
}
916+
/* Prefix 155 - not used, leave as nulls */
917+
918+
/*
919+
* We mustn't overwrite the next field while inserting the checksum.
920+
* Fortunately, the checksum can't exceed 6 octal digits, so we just write
921+
* 6 digits, a space, and a null, which is legal per POSIX.
922+
*/
923+
sprintf(&h[148], "%06o ", _tarChecksum(h));
908924

925+
/* Now send the completed header. */
909926
pq_putmessage('d', h, 512);
910927
}

src/bin/pg_dump/pg_backup_tar.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -876,8 +876,10 @@ _CloseArchive(ArchiveHandle *AH)
876876

877877
tarClose(AH, th);
878878

879-
/* Add a block of NULLs since it's de-rigeur. */
880-
for (i = 0; i < 512; i++)
879+
/*
880+
* EOF marker for tar files is two blocks of NULLs.
881+
*/
882+
for (i = 0; i < 512 * 2; i++)
881883
{
882884
if (fputc(0, ctx->tarFH) == EOF)
883885
die_horribly(AH, modulename,
@@ -1028,11 +1030,16 @@ _tarChecksum(char *header)
10281030
int i,
10291031
sum;
10301032

1031-
sum = 0;
1033+
/*
1034+
* Per POSIX, the checksum is the simple sum of all bytes in the header,
1035+
* treating the bytes as unsigned, and treating the checksum field (at
1036+
* offset 148) as though it contained 8 spaces.
1037+
*/
1038+
sum = 8 * ' '; /* presumed value for checksum field */
10321039
for (i = 0; i < 512; i++)
10331040
if (i < 148 || i >= 156)
10341041
sum += 0xFF & header[i];
1035-
return sum + 256; /* Assume 8 blanks in checksum field */
1042+
return sum;
10361043
}
10371044

10381045
bool
@@ -1046,11 +1053,15 @@ isValidTarHeader(char *header)
10461053
if (sum != chk)
10471054
return false;
10481055

1049-
/* POSIX format */
1050-
if (strncmp(&header[257], "ustar00", 7) == 0)
1056+
/* POSIX tar format */
1057+
if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1058+
memcmp(&header[263], "00", 2) == 0)
1059+
return true;
1060+
/* GNU tar format */
1061+
if (memcmp(&header[257], "ustar \0", 8) == 0)
10511062
return true;
1052-
/* older format */
1053-
if (strncmp(&header[257], "ustar ", 7) == 0)
1063+
/* not-quite-POSIX format written by pre-9.3 pg_dump */
1064+
if (memcmp(&header[257], "ustar00\0", 8) == 0)
10541065
return true;
10551066

10561067
return false;

0 commit comments

Comments
 (0)