Skip to content

Commit abfa16b

Browse files
barneygaleencukou
andauthored
GH-114847: Speed up posixpath.realpath() (#114848)
Apply the following optimizations to `posixpath.realpath()`: - Remove use of recursion - Construct child paths directly rather than using `join()` - Use `os.getcwd[b]()` rather than `abspath()` - Use `startswith(sep)` rather than `isabs()` - Use slicing rather than `split()` Co-authored-by: Petr Viktorin <encukou@gmail.com>
1 parent 9ceaee7 commit abfa16b

File tree

3 files changed

+64
-34
lines changed

3 files changed

+64
-34
lines changed

Lib/posixpath.py

+54-34
Original file line numberDiff line numberDiff line change
@@ -403,55 +403,66 @@ def realpath(filename, *, strict=False):
403403
"""Return the canonical path of the specified filename, eliminating any
404404
symbolic links encountered in the path."""
405405
filename = os.fspath(filename)
406-
path, ok = _joinrealpath(filename[:0], filename, strict, {})
407-
return abspath(path)
408-
409-
# Join two paths, normalizing and eliminating any symbolic links
410-
# encountered in the second path.
411-
# Two leading slashes are replaced by a single slash.
412-
def _joinrealpath(path, rest, strict, seen):
413-
if isinstance(path, bytes):
406+
if isinstance(filename, bytes):
414407
sep = b'/'
415408
curdir = b'.'
416409
pardir = b'..'
410+
getcwd = os.getcwdb
417411
else:
418412
sep = '/'
419413
curdir = '.'
420414
pardir = '..'
415+
getcwd = os.getcwd
416+
417+
# The stack of unresolved path parts. When popped, a special value of None
418+
# indicates that a symlink target has been resolved, and that the original
419+
# symlink path can be retrieved by popping again. The [::-1] slice is a
420+
# very fast way of spelling list(reversed(...)).
421+
rest = filename.split(sep)[::-1]
422+
423+
# The resolved path, which is absolute throughout this function.
424+
# Note: getcwd() returns a normalized and symlink-free path.
425+
path = sep if filename.startswith(sep) else getcwd()
421426

422-
if rest.startswith(sep):
423-
rest = rest[1:]
424-
path = sep
427+
# Mapping from symlink paths to *fully resolved* symlink targets. If a
428+
# symlink is encountered but not yet resolved, the value is None. This is
429+
# used both to detect symlink loops and to speed up repeated traversals of
430+
# the same links.
431+
seen = {}
432+
433+
# Whether we're calling lstat() and readlink() to resolve symlinks. If we
434+
# encounter an OSError for a symlink loop in non-strict mode, this is
435+
# switched off.
436+
querying = True
425437

426438
while rest:
427-
name, _, rest = rest.partition(sep)
439+
name = rest.pop()
440+
if name is None:
441+
# resolved symlink target
442+
seen[rest.pop()] = path
443+
continue
428444
if not name or name == curdir:
429445
# current dir
430446
continue
431447
if name == pardir:
432448
# parent dir
433-
if path:
434-
parent, name = split(path)
435-
if name == pardir:
436-
# ../..
437-
path = join(path, pardir)
438-
else:
439-
# foo/bar/.. -> foo
440-
path = parent
441-
else:
442-
# ..
443-
path = pardir
449+
path = path[:path.rindex(sep)] or sep
450+
continue
451+
if path == sep:
452+
newpath = path + name
453+
else:
454+
newpath = path + sep + name
455+
if not querying:
456+
path = newpath
444457
continue
445-
newpath = join(path, name)
446458
try:
447459
st = os.lstat(newpath)
460+
if not stat.S_ISLNK(st.st_mode):
461+
path = newpath
462+
continue
448463
except OSError:
449464
if strict:
450465
raise
451-
is_link = False
452-
else:
453-
is_link = stat.S_ISLNK(st.st_mode)
454-
if not is_link:
455466
path = newpath
456467
continue
457468
# Resolve the symbolic link
@@ -467,14 +478,23 @@ def _joinrealpath(path, rest, strict, seen):
467478
os.stat(newpath)
468479
else:
469480
# Return already resolved part + rest of the path unchanged.
470-
return join(newpath, rest), False
481+
path = newpath
482+
querying = False
483+
continue
471484
seen[newpath] = None # not resolved symlink
472-
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
473-
if not ok:
474-
return join(path, rest), False
475-
seen[newpath] = path # resolved symlink
485+
target = os.readlink(newpath)
486+
if target.startswith(sep):
487+
# Symlink target is absolute; reset resolved path.
488+
path = sep
489+
# Push the symlink path onto the stack, and signal its specialness by
490+
# also pushing None. When these entries are popped, we'll record the
491+
# fully-resolved symlink target in the 'seen' mapping.
492+
rest.append(newpath)
493+
rest.append(None)
494+
# Push the unresolved symlink target parts onto the stack.
495+
rest.extend(target.split(sep)[::-1])
476496

477-
return path, True
497+
return path
478498

479499

480500
supports_unicode_filenames = (sys.platform == 'darwin')

Lib/test/test_posixpath.py

+9
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,15 @@ def test_realpath_relative(self):
456456
finally:
457457
os_helper.unlink(ABSTFN)
458458

459+
@os_helper.skip_unless_symlink
460+
@skip_if_ABSTFN_contains_backslash
461+
def test_realpath_missing_pardir(self):
462+
try:
463+
os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN)
464+
self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1")
465+
finally:
466+
os_helper.unlink(os_helper.TESTFN)
467+
459468
@os_helper.skip_unless_symlink
460469
@skip_if_ABSTFN_contains_backslash
461470
def test_realpath_symlink_loops(self):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up :func:`os.path.realpath` on non-Windows platforms.

0 commit comments

Comments
 (0)