Skip to content

Commit 42f87d4

Browse files
bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29693)
Numeric fields of type float, notably mtime, can't be represented exactly in the ustar header, so the pax header is used. But it is helpful to set them to the nearest int (i.e. second rather than nanosecond precision mtimes) in the ustar header as well, for the benefit of unarchivers that don't understand the pax header. Add test for tarfile.TarInfo.create_pax_header to confirm correct behaviour. (cherry picked from commit bf2d44f) Co-authored-by: Joshua Root <jmr@macports.org>
1 parent d29bbc2 commit 42f87d4

File tree

3 files changed

+71
-6
lines changed

3 files changed

+71
-6
lines changed

Lib/tarfile.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -888,15 +888,24 @@ def create_pax_header(self, info, encoding):
888888
# Test number fields for values that exceed the field limit or values
889889
# that like to be stored as float.
890890
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
891-
if name in pax_headers:
892-
# The pax header has priority. Avoid overflow.
893-
info[name] = 0
894-
continue
891+
needs_pax = False
895892

896893
val = info[name]
897-
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
898-
pax_headers[name] = str(val)
894+
val_is_float = isinstance(val, float)
895+
val_int = round(val) if val_is_float else val
896+
if not 0 <= val_int < 8 ** (digits - 1):
897+
# Avoid overflow.
899898
info[name] = 0
899+
needs_pax = True
900+
elif val_is_float:
901+
# Put rounded value in ustar header, and full
902+
# precision value in pax header.
903+
info[name] = val_int
904+
needs_pax = True
905+
906+
# The existing pax header has priority.
907+
if needs_pax and name not in pax_headers:
908+
pax_headers[name] = str(val)
900909

901910
# Create a pax extended header if necessary.
902911
if pax_headers:

Lib/test/test_tarfile.py

+55
Original file line numberDiff line numberDiff line change
@@ -1899,6 +1899,61 @@ def test_pax_extended_header(self):
18991899
finally:
19001900
tar.close()
19011901

1902+
def test_create_pax_header(self):
1903+
# The ustar header should contain values that can be
1904+
# represented reasonably, even if a better (e.g. higher
1905+
# precision) version is set in the pax header.
1906+
# Issue #45863
1907+
1908+
# values that should be kept
1909+
t = tarfile.TarInfo()
1910+
t.name = "foo"
1911+
t.mtime = 1000.1
1912+
t.size = 100
1913+
t.uid = 123
1914+
t.gid = 124
1915+
info = t.get_info()
1916+
header = t.create_pax_header(info, encoding="iso8859-1")
1917+
self.assertEqual(info['name'], "foo")
1918+
# mtime should be rounded to nearest second
1919+
self.assertIsInstance(info['mtime'], int)
1920+
self.assertEqual(info['mtime'], 1000)
1921+
self.assertEqual(info['size'], 100)
1922+
self.assertEqual(info['uid'], 123)
1923+
self.assertEqual(info['gid'], 124)
1924+
self.assertEqual(header,
1925+
b'././@PaxHeader' + bytes(86) \
1926+
+ b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
1927+
+ bytes(100) + b'ustar\x0000'+ bytes(247) \
1928+
+ b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
1929+
+ b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
1930+
+ bytes(100) + b'ustar\x0000' + bytes(247))
1931+
1932+
# values that should be changed
1933+
t = tarfile.TarInfo()
1934+
t.name = "foo\u3374" # can't be represented in ascii
1935+
t.mtime = 10**10 # too big
1936+
t.size = 10**10 # too big
1937+
t.uid = 8**8 # too big
1938+
t.gid = 8**8+1 # too big
1939+
info = t.get_info()
1940+
header = t.create_pax_header(info, encoding="iso8859-1")
1941+
# name is kept as-is in info but should be added to pax header
1942+
self.assertEqual(info['name'], "foo\u3374")
1943+
self.assertEqual(info['mtime'], 0)
1944+
self.assertEqual(info['size'], 0)
1945+
self.assertEqual(info['uid'], 0)
1946+
self.assertEqual(info['gid'], 0)
1947+
self.assertEqual(header,
1948+
b'././@PaxHeader' + bytes(86) \
1949+
+ b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
1950+
+ bytes(100) + b'ustar\x0000' + bytes(247) \
1951+
+ b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
1952+
+ b'16 gid=16777217\n20 size=10000000000\n' \
1953+
+ b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
1954+
+ b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
1955+
+ bytes(100) + b'ustar\x0000' + bytes(247))
1956+
19021957

19031958
class UnicodeTest:
19041959

Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
When the :mod:`tarfile` module creates a pax format archive, it will put an integer representation of timestamps in the ustar header (if possible) for the benefit of older unarchivers, in addition to the existing full-precision timestamps in the pax extended header.

0 commit comments

Comments
 (0)