Skip to content

gh-130819: Update tarfile.py#_create_gnu_long_header to align with GNU Tar #130820

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,15 @@ sysconfig
(Contributed by Xuehai Pan in :gh:`131799`.)


tarfile
-------

* Emit ``mode``, ``uname`` and ``gname`` fields for long paths in
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
``tar(1)``.
Comment on lines +1182 to +1183
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
``tar(1)``.
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
:manpage:`tar(1)`.

(Contributed by Dahan Gong in :gh:`130820`.)


threading
---------

Expand Down
52 changes: 36 additions & 16 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,29 @@ def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
dst.write(buf)
return

def _get_user_group_names(uid, gid, unames_cache, gnames_cache):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make it a method of TarFile. We don't use it elsewhere.

# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To speed
# things up, cache the resolved usernames and group names.
Comment on lines +262 to +263
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To speed
# things up, cache the resolved usernames and group names.
# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive.
# To speed things up, cache the resolved usernames and group names.

if pwd:
if uid not in unames_cache:
try:
unames_cache[uid] = pwd.getpwuid(uid)[0]
except KeyError:
unames_cache[uid] = ''
uname = unames_cache[uid]
else:
uname = None
if grp:
if gid not in gnames_cache:
try:
gnames_cache[gid] = grp.getgrgid(gid)[0]
except KeyError:
gnames_cache[gid] = ''
gname = gnames_cache[gid]
else:
gname = None
return uname, gname

def _safe_print(s):
encoding = getattr(sys.stdout, 'encoding', None)
if encoding is not None:
Expand Down Expand Up @@ -883,6 +906,9 @@ class TarInfo(object):
_link_target = None,
)

_unames = {} # Cached mappings of uid=0 -> uname
_gnames = {} # Cached mappings of gid=0 -> gname
Comment on lines +909 to +910
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer that we keep per-instance caches instead of per-class caches, even for 0.


def __init__(self, name=""):
"""Construct a TarInfo object. name is the optional name
of the member.
Expand Down Expand Up @@ -1190,6 +1216,10 @@ def _create_gnu_long_header(cls, name, type, encoding, errors):
info["type"] = type
info["size"] = len(name)
info["magic"] = GNU_MAGIC
info["mode"] = 0o100644
uname, gname = _get_user_group_names(0, 0, cls._unames, cls._gnames)
info["uname"] = uname or ""
info["gname"] = gname or ""

# create extended header + name blocks.
return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Expand Down Expand Up @@ -2141,22 +2171,12 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None):
tarinfo.type = type
tarinfo.linkname = linkname

# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To
# speed things up, cache the resolved usernames and group names.
if pwd:
if tarinfo.uid not in self._unames:
try:
self._unames[tarinfo.uid] = pwd.getpwuid(tarinfo.uid)[0]
except KeyError:
self._unames[tarinfo.uid] = ''
tarinfo.uname = self._unames[tarinfo.uid]
if grp:
if tarinfo.gid not in self._gnames:
try:
self._gnames[tarinfo.gid] = grp.getgrgid(tarinfo.gid)[0]
except KeyError:
self._gnames[tarinfo.gid] = ''
tarinfo.gname = self._gnames[tarinfo.gid]
uname, gname = _get_user_group_names(tarinfo.uid, tarinfo.gid,
self._unames, self._gnames)
if uname != None:
tarinfo.uname = uname
if gname != None:
tarinfo.gname = gname
Comment on lines +2176 to +2179
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if uname != None:
tarinfo.uname = uname
if gname != None:
tarinfo.gname = gname
if uname is not None:
tarinfo.uname = uname
if gname is not None:
tarinfo.gname = gname


if type in (CHRTYPE, BLKTYPE):
if hasattr(os, "major") and hasattr(os, "minor"):
Expand Down
34 changes: 34 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1878,6 +1878,40 @@ def test_longnamelink_1025(self):
self._test(("longnam/" * 127) + "longname_",
("longlnk/" * 127) + "longlink_")

def test_hidden_header_for_longname(self):
# Regression test for gh-130819.
memory_file = io.BytesIO()
tar = tarfile.open(mode="w", fileobj=memory_file, format=tarfile.GNU_FORMAT)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we use a context manager here?

tar_info = tarfile.TarInfo("abcdef" * 20)
tar_info.type = tarfile.DIRTYPE
tar.addfile(tar_info, None)
tar.close()

class RawTabInfo(tarfile.TarInfo):

def _proc_member(self, tar_file):
if self.type in (tarfile.GNUTYPE_LONGNAME, tarfile.GNUTYPE_LONGLINK):
tester.assertEqual(self.mode, 0o644)
unames_cache = RawTabInfo._unames
gnames_cache = RawTabInfo._gnames
if unames_cache:
tester.assertIn(0, unames_cache)
if gnames_cache:
tester.assertIn(0, gnames_cache)
tester.assertEqual(self.uname, unames_cache.get(0, ""))
tester.assertEqual(self.gname, gnames_cache.get(0, ""))
return super()._proc_member(tar_file) # type: ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return super()._proc_member(tar_file) # type: ignore
return super()._proc_member(tar_file)


tester = self
memory_file.seek(0)
tar = tarfile.open(fileobj=memory_file, mode="r", tarinfo=RawTabInfo)
try:
members = tar.getmembers()
self.assertEqual(len(members), 1)
finally:
tar.close()
memory_file.close()
Comment on lines +1907 to +1913
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tar = tarfile.open(fileobj=memory_file, mode="r", tarinfo=RawTabInfo)
try:
members = tar.getmembers()
self.assertEqual(len(members), 1)
finally:
tar.close()
memory_file.close()
tar = tarfile.open(fileobj=memory_file, mode="r", tarinfo=RawTabInfo)
self.addCleanup(tar.close)
members = tar.getmembers()
self.assertEqual(len(members), 1)

The memory file does not need to be closed as it's in-memory only.



class DeviceHeaderTest(WriteTestBase, unittest.TestCase):

Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ Mikhail Golubev
Marta Gómez Macías
Guilherme Gonçalves
Tiago Gonçalves
Dahan Gong
Chris Gonnerman
Shelley Gooch
David Goodger
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Emit ``mode``, ``uname`` and ``gname`` fields for long paths in
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
``tar(1)``.
Loading