From f6d08e5baf1fddf8f98f3f3cfc3bb2772d14633b Mon Sep 17 00:00:00 2001 From: Artem Bulgakov Date: Mon, 7 Sep 2020 19:46:33 +0300 Subject: [PATCH] bpo-41316: Make tarfile follow specs for FNAME (GH-21511) tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information. RFC1952 says about FNAME: This is the original name of the file being compressed, with any directory components removed. So tarfile must remove directory names from FNAME and write only basename of file. Automerge-Triggered-By: @jaraco (cherry picked from commit 22748a83d927d3da1beaed771be30887c42b2500) Co-authored-by: Artem Bulgakov --- Lib/tarfile.py | 2 ++ Lib/test/test_tarfile.py | 14 +++++++++++++- Misc/ACKS | 1 + .../2020-07-28-12-08-58.bpo-41316.bSCbK4.rst | 1 + 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7a69e1b1aa5440..39f63b9cfc1ac9 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -420,6 +420,8 @@ def _init_write_gz(self): self.__write(b"\037\213\010\010" + timestamp + b"\002\377") if self.name.endswith(".gz"): self.name = self.name[:-3] + # Honor "directory components removed" from RFC1952 + self.name = os.path.basename(self.name) # RFC1952 says we must use ISO-8859-1 for the FNAME field. self.__write(self.name.encode("iso-8859-1", "replace") + NUL) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index b512168d6ea870..be717e315700ca 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1384,12 +1384,15 @@ def write(self, data): pax_headers={'non': 'empty'}) self.assertFalse(f.closed) + class GzipWriteTest(GzipTest, WriteTest): pass + class Bz2WriteTest(Bz2Test, WriteTest): pass + class LzmaWriteTest(LzmaTest, WriteTest): pass @@ -1432,8 +1435,17 @@ def test_file_mode(self): finally: os.umask(original_umask) + class GzipStreamWriteTest(GzipTest, StreamWriteTest): - pass + def test_source_directory_not_leaked(self): + """ + Ensure the source directory is not included in the tar header + per bpo-41316. + """ + tarfile.open(tmpname, self.mode).close() + payload = pathlib.Path(tmpname).read_text(encoding='latin-1') + assert os.path.dirname(tmpname) not in payload + class Bz2StreamWriteTest(Bz2Test, StreamWriteTest): decompressor = bz2.BZ2Decompressor if bz2 else None diff --git a/Misc/ACKS b/Misc/ACKS index a08e917b30765a..0c4aead52efa4b 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -236,6 +236,7 @@ Colm Buckley Erik de Bueger Jan-Hein Bührman Lars Buitinck +Artem Bulgakov Dick Bulterman Bill Bumgarner Jimmy Burgett diff --git a/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst b/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst new file mode 100644 index 00000000000000..139a170866ed49 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst @@ -0,0 +1 @@ +Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header. \ No newline at end of file