From f47106dec6d3c18277cf006706e8984a038d00f8 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 13 Jun 2018 00:30:23 +0200 Subject: [PATCH 01/15] Have shutil.copytree(), copy() and copystat() use cached scandir() stat()s --- Lib/shutil.py | 67 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 09a5727ab4645f..7f5afcdb39dbc6 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -204,6 +204,12 @@ def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): def _samefile(src, dst): # Macintosh, Unix. + if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'): + try: + return os.path.samestat(src.stat(), os.stat(dst)) + except OSError: + return False + if hasattr(os.path, 'samefile'): try: return os.path.samefile(src, dst) @@ -214,6 +220,15 @@ def _samefile(src, dst): return (os.path.normcase(os.path.abspath(src)) == os.path.normcase(os.path.abspath(dst))) +def _stat(fn): + return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn) + +def _islink(fn): + return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) + +def _isdir(fn): + return fn.is_dir() if isinstance(fn, os.DirEntry) else os.path.isdir(fn) + def copyfile(src, dst, *, follow_symlinks=True): """Copy data from src to dst. @@ -226,16 +241,17 @@ def copyfile(src, dst, *, follow_symlinks=True): for fn in [src, dst]: try: - st = os.stat(fn) + st = _stat(fn) except OSError: # File most likely does not exist pass else: # XXX What about other special files? (sockets, devices...) if stat.S_ISFIFO(st.st_mode): + fn = fn.path if isinstance(fn, os.DirEntry) else fn raise SpecialFileError("`%s` is a named pipe" % fn) - if not follow_symlinks and os.path.islink(src): + if not follow_symlinks and _islink(src): os.symlink(os.readlink(src), dst) else: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: @@ -265,13 +281,13 @@ def copymode(src, dst, *, follow_symlinks=True): (e.g. Linux) this method does nothing. """ - if not follow_symlinks and os.path.islink(src) and os.path.islink(dst): + if not follow_symlinks and _islink(src) and os.path.islink(dst): if hasattr(os, 'lchmod'): stat_func, chmod_func = os.lstat, os.lchmod else: return elif hasattr(os, 'chmod'): - stat_func, chmod_func = os.stat, os.chmod + stat_func, chmod_func = _stat, os.chmod else: return @@ -316,7 +332,7 @@ def _nop(*args, ns=None, follow_symlinks=None): pass # follow symlinks (aka don't not follow symlinks) - follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst)) + follow = follow_symlinks or not (_islink(src) and os.path.islink(dst)) if follow: # use the real function if it exists def lookup(name): @@ -330,7 +346,10 @@ def lookup(name): return fn return _nop - st = lookup("stat")(src, follow_symlinks=follow) + if follow and isinstance(src, os.DirEntry): + st = src.stat() + else: + st = lookup("stat")(src, follow_symlinks=follow) mode = stat.S_IMODE(st.st_mode) lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns), follow_symlinks=follow) @@ -440,43 +459,51 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, function that supports the same signature (like copy()) can be used. """ - names = os.listdir(src) + # TODO: use with statement; omitted just to make code review easier + # (will add later) + entries = os.scandir(src) if ignore is not None: + ls = [] + names = set() + for entry in entries: + ls.append(entry) + names.add(entry.name) ignored_names = ignore(src, names) + entries = ls else: ignored_names = set() os.makedirs(dst) errors = [] - for name in names: - if name in ignored_names: + for srcentry in entries: + if srcentry.name in ignored_names: continue - srcname = os.path.join(src, name) - dstname = os.path.join(dst, name) + srcname = os.path.join(src, srcentry.name) + dstname = os.path.join(dst, srcentry.name) try: - if os.path.islink(srcname): + if srcentry.is_symlink(): linkto = os.readlink(srcname) if symlinks: # We can't just leave it to `copy_function` because legacy # code with a custom `copy_function` may rely on copytree # doing the right thing. os.symlink(linkto, dstname) - copystat(srcname, dstname, follow_symlinks=not symlinks) + copystat(srcentry, dstname, follow_symlinks=not symlinks) else: # ignore dangling symlink if the flag is on if not os.path.exists(linkto) and ignore_dangling_symlinks: continue # otherwise let the copy occurs. copy2 will raise an error - if os.path.isdir(srcname): - copytree(srcname, dstname, symlinks, ignore, + if srcentry.is_dir(): + copytree(srcentry, dstname, symlinks, ignore, copy_function) else: - copy_function(srcname, dstname) - elif os.path.isdir(srcname): - copytree(srcname, dstname, symlinks, ignore, copy_function) + copy_function(srcentry, dstname) + elif srcentry.is_dir(): + copytree(srcentry, dstname, symlinks, ignore, copy_function) else: # Will raise a SpecialFileError for unsupported file types - copy_function(srcname, dstname) + copy_function(srcentry, dstname) # catch the Error from the recursive copytree so that we can # continue with other files except Error as err: @@ -515,7 +542,7 @@ def _rmtree_unsafe(path, onerror): # os.scandir or entry.is_dir above. raise OSError("Cannot call rmtree on a symbolic link") except OSError: - onerror(os.path.islink, fullname, sys.exc_info()) + onerror(entry.is_symlink, fullname, sys.exc_info()) continue _rmtree_unsafe(fullname, onerror) else: From b5cf41616ac15234c8abe18cdf46f96edc873c26 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 20 Jun 2018 22:19:37 +0200 Subject: [PATCH 02/15] close scandir() context --- Lib/shutil.py | 112 +++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 7f5afcdb39dbc6..72ed9d929daedc 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -461,64 +461,64 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, """ # TODO: use with statement; omitted just to make code review easier # (will add later) - entries = os.scandir(src) - if ignore is not None: - ls = [] - names = set() - for entry in entries: - ls.append(entry) - names.add(entry.name) - ignored_names = ignore(src, names) - entries = ls - else: - ignored_names = set() - - os.makedirs(dst) - errors = [] - for srcentry in entries: - if srcentry.name in ignored_names: - continue - srcname = os.path.join(src, srcentry.name) - dstname = os.path.join(dst, srcentry.name) - try: - if srcentry.is_symlink(): - linkto = os.readlink(srcname) - if symlinks: - # We can't just leave it to `copy_function` because legacy - # code with a custom `copy_function` may rely on copytree - # doing the right thing. - os.symlink(linkto, dstname) - copystat(srcentry, dstname, follow_symlinks=not symlinks) - else: - # ignore dangling symlink if the flag is on - if not os.path.exists(linkto) and ignore_dangling_symlinks: - continue - # otherwise let the copy occurs. copy2 will raise an error - if srcentry.is_dir(): - copytree(srcentry, dstname, symlinks, ignore, - copy_function) + with os.scandir(src) as entries: + if ignore is not None: + ls = [] + names = set() + for entry in entries: + ls.append(entry) + names.add(entry.name) + ignored_names = ignore(src, names) + entries = ls + else: + ignored_names = set() + + os.makedirs(dst) + errors = [] + for srcentry in entries: + if srcentry.name in ignored_names: + continue + srcname = os.path.join(src, srcentry.name) + dstname = os.path.join(dst, srcentry.name) + try: + if srcentry.is_symlink(): + linkto = os.readlink(srcname) + if symlinks: + # We can't just leave it to `copy_function` because + # legacy code with a custom `copy_function` may rely + # on copytree doing the right thing. + os.symlink(linkto, dstname) + copystat(srcentry, dstname, follow_symlinks=not symlinks) else: - copy_function(srcentry, dstname) - elif srcentry.is_dir(): - copytree(srcentry, dstname, symlinks, ignore, copy_function) - else: - # Will raise a SpecialFileError for unsupported file types - copy_function(srcentry, dstname) - # catch the Error from the recursive copytree so that we can - # continue with other files - except Error as err: - errors.extend(err.args[0]) + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + if srcentry.is_dir(): + copytree(srcentry, dstname, symlinks, ignore, + copy_function) + else: + copy_function(srcentry, dstname) + elif srcentry.is_dir(): + copytree(srcentry, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcentry, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + except OSError as why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) except OSError as why: - errors.append((srcname, dstname, str(why))) - try: - copystat(src, dst) - except OSError as why: - # Copying file access times may fail on Windows - if getattr(why, 'winerror', None) is None: - errors.append((src, dst, str(why))) - if errors: - raise Error(errors) - return dst + # Copying file access times may fail on Windows + if getattr(why, 'winerror', None) is None: + errors.append((src, dst, str(why))) + if errors: + raise Error(errors) + return dst # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): From e588b4c7b91f47326126abccb93a29cb9765fe11 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Sat, 23 Jun 2018 11:14:40 +0200 Subject: [PATCH 03/15] use global vars instead of hasattr() --- Lib/shutil.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 241081524529b4..ba679cda6b3960 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -53,6 +53,10 @@ COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024 _HAS_SENDFILE = posix and hasattr(os, "sendfile") _HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS +_HAS_SAMESTAT = hasattr(os.path, 'samestat') +_HAS_SAMEFILE = hasattr(os.path, 'samefile') +_HAS_LCHMOD = hasattr(os, 'lchmod') +_HAS_CHMOD = hasattr(os, 'chmod') __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", "copytree", "move", "rmtree", "Error", "SpecialFileError", @@ -201,13 +205,13 @@ def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): def _samefile(src, dst): # Macintosh, Unix. - if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'): + if isinstance(src, os.DirEntry) and _HAS_SAMESTAT: try: return os.path.samestat(src.stat(), os.stat(dst)) except OSError: return False - if hasattr(os.path, 'samefile'): + if _HAS_SAMEFILE: try: return os.path.samefile(src, dst) except OSError: @@ -288,11 +292,11 @@ def copymode(src, dst, *, follow_symlinks=True): """ if not follow_symlinks and _islink(src) and os.path.islink(dst): - if hasattr(os, 'lchmod'): + if _HAS_LCHMOD: stat_func, chmod_func = os.lstat, os.lchmod else: return - elif hasattr(os, 'chmod'): + elif _HAS_CHMOD: stat_func, chmod_func = _stat, os.chmod else: return From 209716336c20aafcf481487b52a2086b03a77c93 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Sat, 23 Jun 2018 11:35:52 +0200 Subject: [PATCH 04/15] remove unused function --- Lib/shutil.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index ba679cda6b3960..9d5db7c416bffb 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -227,9 +227,6 @@ def _stat(fn): def _islink(fn): return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) -def _isdir(fn): - return fn.is_dir() if isinstance(fn, os.DirEntry) else os.path.isdir(fn) - def copyfile(src, dst, *, follow_symlinks=True): """Copy data from src to dst in the most efficient way possible. From 627405fdfc450c699c247459d27ce156bdaa8e8d Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Sat, 23 Jun 2018 12:32:16 +0200 Subject: [PATCH 05/15] remove ctx manager around os.scandir() to make review easier --- Lib/shutil.py | 112 +++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 9d5db7c416bffb..1ce814715f794d 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -468,64 +468,64 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, """ # TODO: use with statement; omitted just to make code review easier # (will add later) - with os.scandir(src) as entries: - if ignore is not None: - ls = [] - names = set() - for entry in entries: - ls.append(entry) - names.add(entry.name) - ignored_names = ignore(src, names) - entries = ls - else: - ignored_names = set() - - os.makedirs(dst) - errors = [] - for srcentry in entries: - if srcentry.name in ignored_names: - continue - srcname = os.path.join(src, srcentry.name) - dstname = os.path.join(dst, srcentry.name) - try: - if srcentry.is_symlink(): - linkto = os.readlink(srcname) - if symlinks: - # We can't just leave it to `copy_function` because - # legacy code with a custom `copy_function` may rely - # on copytree doing the right thing. - os.symlink(linkto, dstname) - copystat(srcentry, dstname, follow_symlinks=not symlinks) - else: - # ignore dangling symlink if the flag is on - if not os.path.exists(linkto) and ignore_dangling_symlinks: - continue - # otherwise let the copy occurs. copy2 will raise an error - if srcentry.is_dir(): - copytree(srcentry, dstname, symlinks, ignore, - copy_function) - else: - copy_function(srcentry, dstname) - elif srcentry.is_dir(): - copytree(srcentry, dstname, symlinks, ignore, copy_function) - else: - # Will raise a SpecialFileError for unsupported file types - copy_function(srcentry, dstname) - # catch the Error from the recursive copytree so that we can - # continue with other files - except Error as err: - errors.extend(err.args[0]) - except OSError as why: - errors.append((srcname, dstname, str(why))) + entries = os.scandir(src) + if ignore is not None: + ls = [] + names = set() + for entry in entries: + ls.append(entry) + names.add(entry.name) + ignored_names = ignore(src, names) + entries = ls + else: + ignored_names = set() + + os.makedirs(dst) + errors = [] + for srcentry in entries: + if srcentry.name in ignored_names: + continue + srcname = os.path.join(src, srcentry.name) + dstname = os.path.join(dst, srcentry.name) try: - copystat(src, dst) + if srcentry.is_symlink(): + linkto = os.readlink(srcname) + if symlinks: + # We can't just leave it to `copy_function` because + # legacy code with a custom `copy_function` may rely + # on copytree doing the right thing. + os.symlink(linkto, dstname) + copystat(srcentry, dstname, follow_symlinks=not symlinks) + else: + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + if srcentry.is_dir(): + copytree(srcentry, dstname, symlinks, ignore, + copy_function) + else: + copy_function(srcentry, dstname) + elif srcentry.is_dir(): + copytree(srcentry, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcentry, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) except OSError as why: - # Copying file access times may fail on Windows - if getattr(why, 'winerror', None) is None: - errors.append((src, dst, str(why))) - if errors: - raise Error(errors) - return dst + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError as why: + # Copying file access times may fail on Windows + if getattr(why, 'winerror', None) is None: + errors.append((src, dst, str(why))) + if errors: + raise Error(errors) + return dst # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): From 1cb0bffcf1bbb1d6f2d053acf64796cc0d0fcd01 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Sat, 23 Jun 2018 12:32:48 +0200 Subject: [PATCH 06/15] remove ctx manager around os.scandir() to make review easier --- Lib/shutil.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 1ce814715f794d..242298c50440d9 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -491,9 +491,9 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, if srcentry.is_symlink(): linkto = os.readlink(srcname) if symlinks: - # We can't just leave it to `copy_function` because - # legacy code with a custom `copy_function` may rely - # on copytree doing the right thing. + # We can't just leave it to `copy_function` because legacy + # code with a custom `copy_function` may rely on copytree + # doing the right thing. os.symlink(linkto, dstname) copystat(srcentry, dstname, follow_symlinks=not symlinks) else: From 4da7f7636f1e4eb9b10adef44137840b3d76d2ba Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Sat, 23 Jun 2018 12:48:19 +0200 Subject: [PATCH 07/15] update doc --- Doc/whatsnew/3.8.rst | 7 +++++++ .../next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst | 6 ++++++ 2 files changed, 13 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 542e84feaa6f5f..84408415e5f99b 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -123,6 +123,13 @@ Optimizations See :ref:`shutil-platform-dependent-efficient-copy-operations` section. (Contributed by Giampaolo Rodola' in :issue:`25427`.) +* :func:`shutil.copytree` uses :func:`os.scandir` function and all copy + functions depending from it use cached :func:`os.stat` values. The speedup + for copying a directory with 8000 files is around +9% on Linux, +20% on + Windows and +30% on a Windows SMB share. Also the number of :func:`os.stat` + syscalls is reduced by 38% making :func:`shutil.copytree` especially faster + on network filesystems. + * The default protocol in the :mod:`pickle` module is now Protocol 4, first introduced in Python 3.4. It offers better performance and smaller size compared to Protocol 3 available since Python 3.0. diff --git a/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst b/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst new file mode 100644 index 00000000000000..d1f5fef5634b01 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst @@ -0,0 +1,6 @@ +:func:`shutil.copytree` uses :func:`os.scandir` function and all copy +functions depending from it use cached :func:`os.stat` values. The speedup +for copying a directory with 8000 files is around +9% on Linux, +20% on +Windows and + 30% on a Windows SMB share. Also the number of :func:`os.stat` +syscalls is reduced by 38% making :func:`shutil.copytree` especially faster +on network filesystems. From a8479448f449d4e538dc255fc47d4ce4387af33f Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Sat, 23 Jun 2018 12:56:02 +0200 Subject: [PATCH 08/15] add ref to ticket number and credits --- Doc/whatsnew/3.8.rst | 3 ++- .../next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 84408415e5f99b..0c48ed851dbf84 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -128,7 +128,8 @@ Optimizations for copying a directory with 8000 files is around +9% on Linux, +20% on Windows and +30% on a Windows SMB share. Also the number of :func:`os.stat` syscalls is reduced by 38% making :func:`shutil.copytree` especially faster - on network filesystems. + on network filesystems. (Contributed by Giampaolo Rodola' in :issue:`33695`.) + * The default protocol in the :mod:`pickle` module is now Protocol 4, first introduced in Python 3.4. It offers better performance and smaller diff --git a/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst b/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst index d1f5fef5634b01..21950453b0ad51 100644 --- a/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst +++ b/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst @@ -4,3 +4,4 @@ for copying a directory with 8000 files is around +9% on Linux, +20% on Windows and + 30% on a Windows SMB share. Also the number of :func:`os.stat` syscalls is reduced by 38% making :func:`shutil.copytree` especially faster on network filesystems. +(Contributed by Giampaolo Rodola' in :issue:`33695`.) From 5715235a81310501cb16bd64b3ebc230e77dea08 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 18 Jul 2018 15:39:30 +0200 Subject: [PATCH 09/15] use os.path.islink() instead of is_symlink() in onerror() --- Lib/shutil.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 242298c50440d9..7c86301051f2f5 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,7 +10,6 @@ import fnmatch import collections import errno -import io try: import zlib @@ -549,7 +548,7 @@ def _rmtree_unsafe(path, onerror): # os.scandir or entry.is_dir above. raise OSError("Cannot call rmtree on a symbolic link") except OSError: - onerror(entry.is_symlink, fullname, sys.exc_info()) + onerror(os.path.islink, fullname, sys.exc_info()) continue _rmtree_unsafe(fullname, onerror) else: From dedf67ab6dd0f0ff7d27737c5ce8040378b325c3 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 18 Jul 2018 15:53:35 +0200 Subject: [PATCH 10/15] use os.listdir() to collect ignored file names --- Lib/shutil.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 7c86301051f2f5..303379a9ee1981 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -469,13 +469,7 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, # (will add later) entries = os.scandir(src) if ignore is not None: - ls = [] - names = set() - for entry in entries: - ls.append(entry) - names.add(entry.name) - ignored_names = ignore(src, names) - entries = ls + ignored_names = ignore(src, set(os.listdir(src))) else: ignored_names = set() From 559ab8a45093de847be057bfa936eec21f759b56 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 18 Jul 2018 16:00:29 +0200 Subject: [PATCH 11/15] avoid passing DirEntry in case of custom 'copy_function' --- Lib/shutil.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 303379a9ee1981..96e88ae4b13faa 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -475,11 +475,14 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, os.makedirs(dst) errors = [] + use_srcentry = copy_function is copy2 or copy_function is copy + for srcentry in entries: if srcentry.name in ignored_names: continue srcname = os.path.join(src, srcentry.name) dstname = os.path.join(dst, srcentry.name) + srcobj = srcentry if use_srcentry else srcname try: if srcentry.is_symlink(): linkto = os.readlink(srcname) @@ -488,19 +491,19 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, # code with a custom `copy_function` may rely on copytree # doing the right thing. os.symlink(linkto, dstname) - copystat(srcentry, dstname, follow_symlinks=not symlinks) + copystat(srcobj, dstname, follow_symlinks=not symlinks) else: # ignore dangling symlink if the flag is on if not os.path.exists(linkto) and ignore_dangling_symlinks: continue # otherwise let the copy occurs. copy2 will raise an error if srcentry.is_dir(): - copytree(srcentry, dstname, symlinks, ignore, + copytree(srcobj, dstname, symlinks, ignore, copy_function) else: - copy_function(srcentry, dstname) + copy_function(srcobj, dstname) elif srcentry.is_dir(): - copytree(srcentry, dstname, symlinks, ignore, copy_function) + copytree(srcobj, dstname, symlinks, ignore, copy_function) else: # Will raise a SpecialFileError for unsupported file types copy_function(srcentry, dstname) From 410377840ba56ff0d9b591efa3659811dace9608 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 18 Jul 2018 16:06:46 +0200 Subject: [PATCH 12/15] use a support _copytree function to avoid 1 level indentation --- Lib/shutil.py | 83 +++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 96e88ae4b13faa..3619d2492c373a 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -429,45 +429,8 @@ def _ignore_patterns(path, names): return set(ignored_names) return _ignore_patterns -def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, - ignore_dangling_symlinks=False): - """Recursively copy a directory tree. - - The destination directory must not already exist. - If exception(s) occur, an Error is raised with a list of reasons. - - If the optional symlinks flag is true, symbolic links in the - source tree result in symbolic links in the destination tree; if - it is false, the contents of the files pointed to by symbolic - links are copied. If the file pointed by the symlink doesn't - exist, an exception will be added in the list of errors raised in - an Error exception at the end of the copy process. - - You can set the optional ignore_dangling_symlinks flag to true if you - want to silence this exception. Notice that this has no effect on - platforms that don't support os.symlink. - - The optional ignore argument is a callable. If given, it - is called with the `src` parameter, which is the directory - being visited by copytree(), and `names` which is the list of - `src` contents, as returned by os.listdir(): - - callable(src, names) -> ignored_names - - Since copytree() is called recursively, the callable will be - called once for each directory that is copied. It returns a - list of names relative to the `src` directory that should - not be copied. - - The optional copy_function argument is a callable that will be used - to copy each file. It will be called with the source path and the - destination path as arguments. By default, copy2() is used, but any - function that supports the same signature (like copy()) can be used. - - """ - # TODO: use with statement; omitted just to make code review easier - # (will add later) - entries = os.scandir(src) +def _copytree(entries, src, dst, symlinks, ignore, copy_function, + ignore_dangling_symlinks): if ignore is not None: ignored_names = ignore(src, set(os.listdir(src))) else: @@ -523,6 +486,48 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, raise Error(errors) return dst +def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, + ignore_dangling_symlinks=False): + """Recursively copy a directory tree. + + The destination directory must not already exist. + If exception(s) occur, an Error is raised with a list of reasons. + + If the optional symlinks flag is true, symbolic links in the + source tree result in symbolic links in the destination tree; if + it is false, the contents of the files pointed to by symbolic + links are copied. If the file pointed by the symlink doesn't + exist, an exception will be added in the list of errors raised in + an Error exception at the end of the copy process. + + You can set the optional ignore_dangling_symlinks flag to true if you + want to silence this exception. Notice that this has no effect on + platforms that don't support os.symlink. + + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + + callable(src, names) -> ignored_names + + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + + The optional copy_function argument is a callable that will be used + to copy each file. It will be called with the source path and the + destination path as arguments. By default, copy2() is used, but any + function that supports the same signature (like copy()) can be used. + + """ + with os.scandir(src) as entries: + return _copytree( + entries=entries, src=src, dst=dst, symlinks=symlinks, + ignore=ignore, copy_function=copy_function, + ignore_dangling_symlinks=ignore_dangling_symlinks) + # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): try: From 91fa2e26fb23122f7b1c0a20d48503b7ac136f81 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 18 Jul 2018 16:22:16 +0200 Subject: [PATCH 13/15] small refactoring --- Lib/shutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 3619d2492c373a..8e2605567a3c1f 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -352,8 +352,8 @@ def lookup(name): return fn return _nop - if follow and isinstance(src, os.DirEntry): - st = src.stat() + if isinstance(src, os.DirEntry): + st = src.stat(follow_symlinks=follow) else: st = lookup("stat")(src, follow_symlinks=follow) mode = stat.S_IMODE(st.st_mode) From 5712a0a02cbecd91c19cf446632911403b71aea5 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Wed, 18 Jul 2018 16:28:02 +0200 Subject: [PATCH 14/15] indentation --- Lib/shutil.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 8e2605567a3c1f..06ed4c95180d82 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -523,10 +523,9 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, """ with os.scandir(src) as entries: - return _copytree( - entries=entries, src=src, dst=dst, symlinks=symlinks, - ignore=ignore, copy_function=copy_function, - ignore_dangling_symlinks=ignore_dangling_symlinks) + return _copytree(entries=entries, src=src, dst=dst, symlinks=symlinks, + ignore=ignore, copy_function=copy_function, + ignore_dangling_symlinks=ignore_dangling_symlinks) # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): From f0806db070c75f7b0954c7d74ecf940311d26d19 Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Thu, 2 Aug 2018 02:42:13 +0200 Subject: [PATCH 15/15] be more explicit and do not globalize function availability via hasattr() (speedup is neglibigle anyway) --- Lib/shutil.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 06ed4c95180d82..d5210bd47bbf6c 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -52,10 +52,6 @@ COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024 _HAS_SENDFILE = posix and hasattr(os, "sendfile") _HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS -_HAS_SAMESTAT = hasattr(os.path, 'samestat') -_HAS_SAMEFILE = hasattr(os.path, 'samefile') -_HAS_LCHMOD = hasattr(os, 'lchmod') -_HAS_CHMOD = hasattr(os, 'chmod') __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", "copytree", "move", "rmtree", "Error", "SpecialFileError", @@ -204,13 +200,13 @@ def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): def _samefile(src, dst): # Macintosh, Unix. - if isinstance(src, os.DirEntry) and _HAS_SAMESTAT: + if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'): try: return os.path.samestat(src.stat(), os.stat(dst)) except OSError: return False - if _HAS_SAMEFILE: + if hasattr(os.path, 'samefile'): try: return os.path.samefile(src, dst) except OSError: @@ -288,11 +284,11 @@ def copymode(src, dst, *, follow_symlinks=True): """ if not follow_symlinks and _islink(src) and os.path.islink(dst): - if _HAS_LCHMOD: + if hasattr(os, 'lchmod'): stat_func, chmod_func = os.lstat, os.lchmod else: return - elif _HAS_CHMOD: + elif hasattr(os, 'chmod'): stat_func, chmod_func = _stat, os.chmod else: return