Skip to content

Commit 900d77f

Browse files
committed
Fix tar files emitted by pg_basebackup to be POSIX conformant.
Back-patch portions of commit 05b555d. There doesn't seem to be any reason not to fix pg_basebackup fully, but we can't change pg_dump's "magic" string without breaking older versions of pg_restore. Instead, just patch pg_restore to accept either version of the magic string, in hopes of avoiding compatibility problems when 9.3 comes out. I also fixed pg_dump to write the correct 2-block EOF marker, since that won't create a compatibility problem with pg_restore and it could help with some versions of tar. Brian Weaver and Tom Lane
1 parent f2366e5 commit 900d77f

File tree

3 files changed

+64
-34
lines changed

3 files changed

+64
-34
lines changed

doc/src/sgml/protocol.sgml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,9 +1759,11 @@ The commands accepted in walsender mode are:
17591759
After the second regular result set, one or more CopyResponse results
17601760
will be sent, one for PGDATA and one for each additional tablespace other
17611761
than <literal>pg_default</> and <literal>pg_global</>. The data in
1762-
the CopyResponse results will be a tar format (using ustar00
1763-
extensions) dump of the tablespace contents. After the tar data is
1764-
complete, a final ordinary result set will be sent.
1762+
the CopyResponse results will be a tar format (following the
1763+
<quote>ustar interchange format</> specified in the POSIX 1003.1-2008
1764+
standard) dump of the tablespace contents, except that the two trailing
1765+
blocks of zeroes specified in the standard are omitted.
1766+
After the tar data is complete, a final ordinary result set will be sent.
17651767
</para>
17661768

17671769
<para>

src/backend/replication/basebackup.c

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ sendFileWithContent(const char *filename, const char *content)
572572

573573
/*
574574
* Include all files from the given directory in the output tar stream. If
575-
* 'sizeonly' is true, we just calculate a total length and return ig, without
575+
* 'sizeonly' is true, we just calculate a total length and return it, without
576576
* actually sending anything.
577577
*/
578578
static int64
@@ -767,11 +767,16 @@ _tarChecksum(char *header)
767767
int i,
768768
sum;
769769

770-
sum = 0;
770+
/*
771+
* Per POSIX, the checksum is the simple sum of all bytes in the header,
772+
* treating the bytes as unsigned, and treating the checksum field (at
773+
* offset 148) as though it contained 8 spaces.
774+
*/
775+
sum = 8 * ' '; /* presumed value for checksum field */
771776
for (i = 0; i < 512; i++)
772777
if (i < 148 || i >= 156)
773778
sum += 0xFF & header[i];
774-
return sum + 256; /* Assume 8 blanks in checksum field */
779+
return sum;
775780
}
776781

777782
/* Given the member, write the TAR header & send the file */
@@ -850,9 +855,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
850855
struct stat * statbuf)
851856
{
852857
char h[512];
853-
int lastSum = 0;
854-
int sum;
855858

859+
/*
860+
* Note: most of the fields in a tar header are not supposed to be
861+
* null-terminated. We use sprintf, which will write a null after the
862+
* required bytes; that null goes into the first byte of the next field.
863+
* This is okay as long as we fill the fields in order.
864+
*/
856865
memset(h, 0, sizeof(h));
857866

858867
/* Name 100 */
@@ -864,8 +873,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
864873
* indicated in the tar format by adding a slash at the end of the
865874
* name, the same as for regular directories.
866875
*/
867-
h[strlen(filename)] = '/';
868-
h[strlen(filename) + 1] = '\0';
876+
int flen = strlen(filename);
877+
878+
flen = Min(flen, 99);
879+
h[flen] = '/';
880+
h[flen + 1] = '\0';
869881
}
870882

871883
/* Mode 8 */
@@ -875,9 +887,9 @@ _tarWriteHeader(const char *filename, const char *linktarget,
875887
sprintf(&h[108], "%07o ", statbuf->st_uid);
876888

877889
/* Group 8 */
878-
sprintf(&h[117], "%07o ", statbuf->st_gid);
890+
sprintf(&h[116], "%07o ", statbuf->st_gid);
879891

880-
/* File size 12 - 11 digits, 1 space, no NUL */
892+
/* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
881893
if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
882894
/* Symbolic link or directory has size zero */
883895
print_val(&h[124], 0, 8, 11);
@@ -888,13 +900,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
888900
/* Mod Time 12 */
889901
sprintf(&h[136], "%011o ", (int) statbuf->st_mtime);
890902

891-
/* Checksum 8 */
892-
sprintf(&h[148], "%06o ", lastSum);
903+
/* Checksum 8 cannot be calculated until we've filled all other fields */
893904

894905
if (linktarget != NULL)
895906
{
896907
/* Type - Symbolic link */
897908
sprintf(&h[156], "2");
909+
/* Link Name 100 */
898910
sprintf(&h[157], "%.99s", linktarget);
899911
}
900912
else if (S_ISDIR(statbuf->st_mode))
@@ -904,10 +916,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
904916
/* Type - regular file */
905917
sprintf(&h[156], "0");
906918

907-
/* Link tag 100 (NULL) */
919+
/* Magic 6 */
920+
sprintf(&h[257], "ustar");
908921

909-
/* Magic 6 + Version 2 */
910-
sprintf(&h[257], "ustar00");
922+
/* Version 2 */
923+
sprintf(&h[263], "00");
911924

912925
/* User 32 */
913926
/* XXX: Do we need to care about setting correct username? */
@@ -917,17 +930,21 @@ _tarWriteHeader(const char *filename, const char *linktarget,
917930
/* XXX: Do we need to care about setting correct group name? */
918931
sprintf(&h[297], "%.31s", "postgres");
919932

920-
/* Maj Dev 8 */
921-
sprintf(&h[329], "%6o ", 0);
933+
/* Major Dev 8 */
934+
sprintf(&h[329], "%07o ", 0);
922935

923-
/* Min Dev 8 */
924-
sprintf(&h[337], "%6o ", 0);
936+
/* Minor Dev 8 */
937+
sprintf(&h[337], "%07o ", 0);
925938

926-
while ((sum = _tarChecksum(h)) != lastSum)
927-
{
928-
sprintf(&h[148], "%06o ", sum);
929-
lastSum = sum;
930-
}
939+
/* Prefix 155 - not used, leave as nulls */
940+
941+
/*
942+
* We mustn't overwrite the next field while inserting the checksum.
943+
* Fortunately, the checksum can't exceed 6 octal digits, so we just write
944+
* 6 digits, a space, and a null, which is legal per POSIX.
945+
*/
946+
sprintf(&h[148], "%06o ", _tarChecksum(h));
931947

948+
/* Now send the completed header. */
932949
pq_putmessage('d', h, 512);
933950
}

src/bin/pg_dump/pg_backup_tar.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -882,8 +882,10 @@ _CloseArchive(ArchiveHandle *AH)
882882

883883
tarClose(AH, th);
884884

885-
/* Add a block of NULLs since it's de-rigeur. */
886-
for (i = 0; i < 512; i++)
885+
/*
886+
* EOF marker for tar files is two blocks of NULLs.
887+
*/
888+
for (i = 0; i < 512 * 2; i++)
887889
{
888890
if (fputc(0, ctx->tarFH) == EOF)
889891
exit_horribly(modulename,
@@ -1032,11 +1034,16 @@ _tarChecksum(char *header)
10321034
int i,
10331035
sum;
10341036

1035-
sum = 0;
1037+
/*
1038+
* Per POSIX, the checksum is the simple sum of all bytes in the header,
1039+
* treating the bytes as unsigned, and treating the checksum field (at
1040+
* offset 148) as though it contained 8 spaces.
1041+
*/
1042+
sum = 8 * ' '; /* presumed value for checksum field */
10361043
for (i = 0; i < 512; i++)
10371044
if (i < 148 || i >= 156)
10381045
sum += 0xFF & header[i];
1039-
return sum + 256; /* Assume 8 blanks in checksum field */
1046+
return sum;
10401047
}
10411048

10421049
bool
@@ -1050,11 +1057,15 @@ isValidTarHeader(char *header)
10501057
if (sum != chk)
10511058
return false;
10521059

1053-
/* POSIX format */
1054-
if (strncmp(&header[257], "ustar00", 7) == 0)
1060+
/* POSIX tar format */
1061+
if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1062+
memcmp(&header[263], "00", 2) == 0)
1063+
return true;
1064+
/* GNU tar format */
1065+
if (memcmp(&header[257], "ustar \0", 8) == 0)
10551066
return true;
1056-
/* older format */
1057-
if (strncmp(&header[257], "ustar ", 7) == 0)
1067+
/* not-quite-POSIX format written by pre-9.3 pg_dump */
1068+
if (memcmp(&header[257], "ustar00\0", 8) == 0)
10581069
return true;
10591070

10601071
return false;

0 commit comments

Comments
 (0)