From c981c3d267c719aa1f658733635e1e7072b6e46b Mon Sep 17 00:00:00 2001 From: mattprodani Date: Fri, 1 Dec 2023 15:13:17 -0500 Subject: [PATCH 1/3] Fix tarfile FilterError handling to skip member extraction In `tarfile` library, FilterError with error_level set to 0 correctly logged a debugging message but did not properly skip extraction of a member. Updates filter functions to return None when a FilterError is seen, as stated in docs. --- Lib/tarfile.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index ec32f9ba49b03f..114f6ed54836a6 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2309,21 +2309,21 @@ def _get_extract_tarinfo(self, member, filter_function, path): else: tarinfo = member - unfiltered = tarinfo + filtered = None try: - tarinfo = filter_function(tarinfo, path) + filtered = filter_function(tarinfo, path) except (OSError, FilterError) as e: self._handle_fatal_error(e) except ExtractError as e: self._handle_nonfatal_error(e) - if tarinfo is None: - self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) + if filtered is None: + self._dbg(2, "tarfile: Excluded %r" % tarinfo.name) return None # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo = copy.copy(tarinfo) - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - return tarinfo + if filtered.islnk(): + filtered = copy.copy(filtered) + filtered._link_target = os.path.join(path, filtered.linkname) + return filtered def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): """Extract from filtered tarinfo to disk""" From 191998a6a9c780e8b986a894413df36950b0a365 Mon Sep 17 00:00:00 2001 From: mattprodani Date: Fri, 1 Dec 2023 15:13:17 -0500 Subject: [PATCH 2/3] add and fix tests for filtererror --- Lib/tarfile.py | 22 +++++++++++++--------- Lib/test/test_tarfile.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 114f6ed54836a6..de6fcd00dcf404 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2309,21 +2309,25 @@ def _get_extract_tarinfo(self, member, filter_function, path): else: tarinfo = member - filtered = None + unfiltered = tarinfo try: - filtered = filter_function(tarinfo, path) - except (OSError, FilterError) as e: + tarinfo = filter_function(tarinfo, path) + except FilterError as e: + tarinfo = None + self._handle_fatal_error(e) + except OSError as e: self._handle_fatal_error(e) except ExtractError as e: self._handle_nonfatal_error(e) - if filtered is None: - self._dbg(2, "tarfile: Excluded %r" % tarinfo.name) + + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) return None # Prepare the link target for makelink(). - if filtered.islnk(): - filtered = copy.copy(filtered) - filtered._link_target = os.path.join(path, filtered.linkname) - return filtered + if tarinfo.islnk(): + tarinfo = copy.copy(tarinfo) + tarinfo._link_target = os.path.join(path, tarinfo.linkname) + return tarinfo def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): """Extract from filtered tarinfo to disk""" diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 761560bfbf8b53..c0120713af8187 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3141,15 +3141,30 @@ def check_files_present(self, directory): @contextmanager def extract_with_none(self, *attr_names): DIR = pathlib.Path(TEMPDIR) / "extractall_none" + self.tar.errorlevel = 0 - for member in self.tar.getmembers(): - for attr_name in attr_names: - setattr(member, attr_name, None) + + filter_function = self.tar._get_filter_function(self.extraction_filter) + with os_helper.temp_dir(DIR): - self.tar.extractall(DIR, filter='fully_trusted') + + # this makes sure we only test files which would have been extracted + # prior to changing metadata + # for example, special files are not extracted with the data filter, + # but 'mode' is required to identify them + filtered_members = [] + + for member in self.tar.getmembers(): + member = self.tar._get_extract_tarinfo(member, filter_function, DIR) + if member is not None: + for attr_name in attr_names: + setattr(member, attr_name, None) + filtered_members.append(member) + self.tar.extractall(DIR, members = filtered_members, filter=self.extraction_filter) self.check_files_present(DIR) yield DIR + def test_extractall_none_mtime(self): # mtimes of extracted files should be later than 'now' -- the mtime # of a previously created directory. @@ -3478,6 +3493,14 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None, for parent in path.parents: self.expected_paths.discard(parent) + def expect_file_skipped(self, name): + """ Check a single file extraction is skipped. E.g. due to a filter. """ + if self.raised_exception: + raise self.raised_exception + # use normpath() rather than resolve() so we don't follow symlinks + path = pathlib.Path(os.path.normpath(self.destdir / name)) + self.assertNotIn(path, self.expected_paths) + def expect_exception(self, exc_type, message_re='.'): with self.assertRaisesRegex(exc_type, message_re): if self.raised_exception is not None: @@ -4071,9 +4094,6 @@ def valueerror_filter(tarinfo, path): with self.check_context(arc.open(errorlevel=0), extracterror_filter): self.expect_file('file') - with self.check_context(arc.open(errorlevel=0), filtererror_filter): - self.expect_file('file') - with self.check_context(arc.open(errorlevel=0), oserror_filter): self.expect_file('file') @@ -4083,6 +4103,11 @@ def valueerror_filter(tarinfo, path): with self.check_context(arc.open(errorlevel=0), valueerror_filter): self.expect_exception(ValueError) + # If errorlevel is 0, FilterErrors are logged and member is skipped + + with self.check_context(arc.open(errorlevel=0), filtererror_filter): + self.expect_file_skipped('file') + # If 1, all fatal errors are raised with self.check_context(arc.open(errorlevel=1), extracterror_filter): From 98128230894761eef5d9e192d3a4dda76060daa5 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 12 Dec 2023 20:26:44 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst diff --git a/Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst b/Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst new file mode 100644 index 00000000000000..31de1cb5224c12 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst @@ -0,0 +1 @@ +Fix bug in :mod:`tarfile` where calling :func:`TarFile.extract()` or :func:`TarFile.extractall()` would continue to extract filtered unsafe files when :attr:`errorlevel` was set to 0. Rather, this fix implements a 'skip-but-log' behavior that ensures unsafe members are not extracted, as was expected from the docs.