diff --git a/Lib/tarfile.py b/Lib/tarfile.py index ec32f9ba49b03f..de6fcd00dcf404 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2312,10 +2312,14 @@ def _get_extract_tarinfo(self, member, filter_function, path): unfiltered = tarinfo try: tarinfo = filter_function(tarinfo, path) - except (OSError, FilterError) as e: + except FilterError as e: + tarinfo = None + self._handle_fatal_error(e) + except OSError as e: self._handle_fatal_error(e) except ExtractError as e: self._handle_nonfatal_error(e) + if tarinfo is None: self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) return None diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 761560bfbf8b53..c0120713af8187 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3141,15 +3141,30 @@ def check_files_present(self, directory): @contextmanager def extract_with_none(self, *attr_names): DIR = pathlib.Path(TEMPDIR) / "extractall_none" + self.tar.errorlevel = 0 - for member in self.tar.getmembers(): - for attr_name in attr_names: - setattr(member, attr_name, None) + + filter_function = self.tar._get_filter_function(self.extraction_filter) + with os_helper.temp_dir(DIR): - self.tar.extractall(DIR, filter='fully_trusted') + + # this makes sure we only test files which would have been extracted + # prior to changing metadata + # for example, special files are not extracted with the data filter, + # but 'mode' is required to identify them + filtered_members = [] + + for member in self.tar.getmembers(): + member = self.tar._get_extract_tarinfo(member, filter_function, DIR) + if member is not None: + for attr_name in attr_names: + setattr(member, attr_name, None) + filtered_members.append(member) + self.tar.extractall(DIR, members = filtered_members, filter=self.extraction_filter) self.check_files_present(DIR) yield DIR + def test_extractall_none_mtime(self): # mtimes of extracted files should be later than 'now' -- the mtime # of a previously created directory. @@ -3478,6 +3493,14 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None, for parent in path.parents: self.expected_paths.discard(parent) + def expect_file_skipped(self, name): + """ Check a single file extraction is skipped. E.g. due to a filter. """ + if self.raised_exception: + raise self.raised_exception + # use normpath() rather than resolve() so we don't follow symlinks + path = pathlib.Path(os.path.normpath(self.destdir / name)) + self.assertNotIn(path, self.expected_paths) + def expect_exception(self, exc_type, message_re='.'): with self.assertRaisesRegex(exc_type, message_re): if self.raised_exception is not None: @@ -4071,9 +4094,6 @@ def valueerror_filter(tarinfo, path): with self.check_context(arc.open(errorlevel=0), extracterror_filter): self.expect_file('file') - with self.check_context(arc.open(errorlevel=0), filtererror_filter): - self.expect_file('file') - with self.check_context(arc.open(errorlevel=0), oserror_filter): self.expect_file('file') @@ -4083,6 +4103,11 @@ def valueerror_filter(tarinfo, path): with self.check_context(arc.open(errorlevel=0), valueerror_filter): self.expect_exception(ValueError) + # If errorlevel is 0, FilterErrors are logged and member is skipped + + with self.check_context(arc.open(errorlevel=0), filtererror_filter): + self.expect_file_skipped('file') + # If 1, all fatal errors are raised with self.check_context(arc.open(errorlevel=1), extracterror_filter): diff --git a/Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst b/Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst new file mode 100644 index 00000000000000..31de1cb5224c12 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-12-20-26-43.gh-issue-112887.2DbuVm.rst @@ -0,0 +1 @@ +Fix bug in :mod:`tarfile` where calling :func:`TarFile.extract()` or :func:`TarFile.extractall()` would continue to extract filtered unsafe files when :attr:`errorlevel` was set to 0. Rather, this fix implements a 'skip-but-log' behavior that ensures unsafe members are not extracted, as was expected from the docs.