From 0684c9a5b68a54e2b0b0506fee029abea13a07eb Mon Sep 17 00:00:00 2001 From: Joshua Root Date: Mon, 22 Nov 2021 18:39:24 +1100 Subject: [PATCH 1/2] bpo-45863: tarfile: don't zero out header fields unnecessarily Numeric fields of type float, notably mtime, can't be represented exactly in the ustar header, so the pax header is used. But it is helpful to set them to the nearest int (i.e. second rather than nanosecond precision mtimes) in the ustar header as well, for the benefit of unarchivers that don't understand the pax header. Add test for tarfile.TarInfo.create_pax_header to confirm correct behaviour. --- Lib/tarfile.py | 21 ++++++++++----- Lib/test/test_tarfile.py | 55 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 6 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index e187da2b1994a6..6ada9a05db9cbe 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -888,15 +888,24 @@ def create_pax_header(self, info, encoding): # Test number fields for values that exceed the field limit or values # that like to be stored as float. for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): - if name in pax_headers: - # The pax header has priority. Avoid overflow. - info[name] = 0 - continue + needs_pax = False val = info[name] - if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = str(val) + val_is_float = isinstance(val, float) + val_int = round(val) if val_is_float else val + if not 0 <= val_int < 8 ** (digits - 1): + # Avoid overflow. info[name] = 0 + needs_pax = True + elif val_is_float: + # Put rounded value in ustar header, and full + # precision value in pax header. + info[name] = val_int + needs_pax = True + + # The existing pax header has priority. + if needs_pax and name not in pax_headers: + pax_headers[name] = str(val) # Create a pax extended header if necessary. if pax_headers: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 1357df57eb1797..4bf1ba38f5ea5a 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1899,6 +1899,61 @@ def test_pax_extended_header(self): finally: tar.close() + def test_create_pax_header(self): + # The ustar header should contain values that can be + # represented reasonably, even if a better (e.g. higher + # precision) version is set in the pax header. + # Issue #45863 + + # values that should be kept + t = tarfile.TarInfo() + t.name = "foo" + t.mtime = 1000.1 + t.size = 100 + t.uid = 123 + t.gid = 124 + info = t.get_info() + header = t.create_pax_header(info, encoding="iso8859-1") + self.assertEqual(info['name'], "foo") + # mtime should be rounded to nearest second + self.assertIsInstance(info['mtime'], int) + self.assertEqual(info['mtime'], 1000) + self.assertEqual(info['size'], 100) + self.assertEqual(info['uid'], 123) + self.assertEqual(info['gid'], 124) + self.assertEqual(header, + b'././@PaxHeader' + bytes(86) \ + + b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \ + + bytes(100) + b'ustar\x0000'+ bytes(247) \ + + b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \ + + b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \ + + bytes(100) + b'ustar\x0000' + bytes(247)) + + # values that should be changed + t = tarfile.TarInfo() + t.name = "foo\u3374" # can't be represented in ascii + t.mtime = 10**10 # too big + t.size = 10**10 # too big + t.uid = 8**8 # too big + t.gid = 8**8+1 # too big + info = t.get_info() + header = t.create_pax_header(info, encoding="iso8859-1") + # name is kept as-is in info but should be added to pax header + self.assertEqual(info['name'], "foo\u3374") + self.assertEqual(info['mtime'], 0) + self.assertEqual(info['size'], 0) + self.assertEqual(info['uid'], 0) + self.assertEqual(info['gid'], 0) + self.assertEqual(header, + b'././@PaxHeader' + bytes(86) \ + + b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \ + + bytes(100) + b'ustar\x0000' + bytes(247) \ + + b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \ + + b'16 gid=16777217\n20 size=10000000000\n' \ + + b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \ + + b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \ + + bytes(100) + b'ustar\x0000' + bytes(247)) + class UnicodeTest: From e233e91f1042df471c6b4dbabb4ed110b06e275c Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 9 Feb 2022 00:53:24 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst diff --git a/Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst b/Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst new file mode 100644 index 00000000000000..3a1335cc77b9d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst @@ -0,0 +1 @@ +When the :mod:`tarfile` module creates a pax format archive, it will put an integer representation of timestamps in the ustar header (if possible) for the benefit of older unarchivers, in addition to the existing full-precision timestamps in the pax extended header. \ No newline at end of file