diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2b314b6c9a16bf..5bf1ec407d1824 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -87,6 +87,128 @@ def isabs(self, path): raise UnsupportedOperation(self._unsupported_msg('isabs()')) +class CopierBase: + """Base class for path copiers, which transfer files and directories from + one path object to another. + + A reference to this class is available as PathBase._copier. When + PathBase.copy() is called, it uses the copier type of the *target* path to + perform the copy; this allows writing of data and metadata to occur + together (or in a particular order) where supported or required by the + path type. + """ + __slots__ = ('follow_symlinks', 'dirs_exist_ok', 'preserve_metadata') + + def __init__(self, follow_symlinks=True, dirs_exist_ok=False, + preserve_metadata=False): + self.follow_symlinks = follow_symlinks + self.dirs_exist_ok = dirs_exist_ok + self.preserve_metadata = preserve_metadata + + @classmethod + def ensure_different_files(cls, source, target, dir_entry=None): + """Raise OSError(EINVAL) if both paths refer to the same file.""" + try: + target_st = target.stat() + try: + source_st = dir_entry.stat() + except AttributeError: + source_st = source.stat() + except (OSError, ValueError): + return + if source_st.st_ino != target_st.st_ino: + return # Different inode + if source_st.st_dev != target_st.st_dev: + return # Different device + err = OSError(EINVAL, "Source and target are the same file") + err.filename = str(source) + err.filename2 = str(target) + raise err + + @classmethod + def ensure_distinct_paths(cls, source, target): + """Raise OSError(EINVAL) if the target is within the source path.""" + # Note: there is no straightforward, foolproof algorithm to determine + # if one directory is within another (a particularly perverse example + # would be a single network share mounted in one location via NFS, and + # in another location via CIFS), so we simply checks whether the + # other path is lexically equal to, or within, this path. + if source == target: + err = OSError(EINVAL, "Source and target are the same path") + elif source in target.parents: + err = OSError(EINVAL, "Source path is a parent of target path") + else: + return + err.filename = str(source) + err.filename2 = str(target) + raise err + + def copy(self, source, target): + """Copy the given file or directory tree to the given target.""" + self.ensure_distinct_paths(source, target) + if self.preserve_metadata: + metadata_keys = source._readable_metadata & target._writable_metadata + else: + metadata_keys = None + if not self.follow_symlinks and source.is_symlink(): + self.copy_symlink(source, target, metadata_keys) + elif source.is_dir(): + self.copy_dir(source, target, metadata_keys) + else: + self.copy_file(source, target, metadata_keys) + + def copy_dir(self, source, target, metadata_keys, dir_entry=None): + """Copy the given directory to the given target.""" + if metadata_keys: + metadata = source._read_metadata(metadata_keys, dir_entry=dir_entry) + else: + metadata = None + with source.scandir() as entries: + target.mkdir(exist_ok=self.dirs_exist_ok) + for entry in entries: + src = source._join_dir_entry(entry) + dst = target.joinpath(entry.name) + if not self.follow_symlinks and entry.is_symlink(): + self.copy_symlink(src, dst, metadata_keys, entry) + elif entry.is_dir(): + self.copy_dir(src, dst, metadata_keys, entry) + else: + self.copy_file(src, dst, metadata_keys, entry) + if metadata: + target._write_metadata(metadata) + + def copy_file(self, source, target, metadata_keys, dir_entry=None): + """Copy the given file to the given target.""" + self.ensure_different_files(source, target, dir_entry) + if metadata_keys: + metadata = source._read_metadata(metadata_keys, dir_entry=dir_entry) + else: + metadata = None + with source.open('rb') as source_f: + try: + with target.open('wb') as target_f: + copyfileobj(source_f, target_f) + except IsADirectoryError as e: + if not target.exists(): + # Raise a less confusing exception. + raise FileNotFoundError( + f'Directory does not exist: {target}') from e + raise + if metadata: + target._write_metadata(metadata) + + def copy_symlink(self, source, target, metadata_keys, dir_entry=None): + """Copy the given symlink to the given target.""" + if metadata_keys: + metadata = source._read_metadata( + metadata_keys, follow_symlinks=False, dir_entry=dir_entry) + else: + metadata = None + target.symlink_to(source.readlink()) + if metadata: + target._write_metadata(metadata, follow_symlinks=False) + + class PathGlobber(_GlobberBase): """ Class providing shell-style globbing for path objects. @@ -426,6 +548,9 @@ class PathBase(PurePathBase): # Maximum number of symlinks to follow in resolve() _max_symlinks = 40 + _copier = CopierBase + _readable_metadata = frozenset() + _writable_metadata = frozenset() @classmethod def _unsupported_msg(cls, attribute): @@ -558,39 +683,6 @@ def samefile(self, other_path): return (st.st_ino == other_st.st_ino and st.st_dev == other_st.st_dev) - def _ensure_different_file(self, other_path): - """ - Raise OSError(EINVAL) if both paths refer to the same file. - """ - try: - if not self.samefile(other_path): - return - except (OSError, ValueError): - return - err = OSError(EINVAL, "Source and target are the same file") - err.filename = str(self) - err.filename2 = str(other_path) - raise err - - def _ensure_distinct_path(self, other_path): - """ - Raise OSError(EINVAL) if the other path is within this path. - """ - # Note: there is no straightforward, foolproof algorithm to determine - # if one directory is within another (a particularly perverse example - # would be a single network share mounted in one location via NFS, and - # in another location via CIFS), so we simply checks whether the - # other path is lexically equal to, or within, this path. - if self == other_path: - err = OSError(EINVAL, "Source and target are the same path") - elif self in other_path.parents: - err = OSError(EINVAL, "Source path is a parent of target path") - else: - return - err.filename = str(self) - err.filename2 = str(other_path) - raise err - def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ @@ -632,6 +724,12 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) + def _join_dir_entry(self, dir_entry): + """Construct a new path object from the given os.DirEntry-like object, + which should have been generated by calling scandir() on this path. + """ + return self.joinpath(dir_entry.name) + def scandir(self): """Yield os.DirEntry objects of the directory contents. @@ -647,8 +745,8 @@ def iterdir(self): special entries '.' and '..' are not included. """ with self.scandir() as entries: - names = [entry.name for entry in entries] - return map(self.joinpath, names) + entries = list(entries) + return map(self._join_dir_entry, entries) def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: @@ -704,7 +802,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): try: if entry.is_dir(follow_symlinks=follow_symlinks): if not top_down: - paths.append(path.joinpath(name)) + paths.append(path._join_dir_entry(entry)) dirnames.append(name) else: filenames.append(name) @@ -790,13 +888,6 @@ def symlink_to(self, target, target_is_directory=False): """ raise UnsupportedOperation(self._unsupported_msg('symlink_to()')) - def _symlink_to_target_of(self, link): - """ - Make this path a symlink with the same target as the given link. This - is used by copy(). - """ - self.symlink_to(link.readlink()) - def hardlink_to(self, target): """ Make this path a hard link pointing to the same file as *target*. @@ -817,10 +908,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ raise UnsupportedOperation(self._unsupported_msg('mkdir()')) - # Metadata keys supported by this path type. - _readable_metadata = _writable_metadata = frozenset() - - def _read_metadata(self, keys=None, *, follow_symlinks=True): + def _read_metadata(self, metadata_keys, *, follow_symlinks=True, dir_entry=None): """ Returns path metadata as a dict with string keys. """ @@ -832,33 +920,6 @@ def _write_metadata(self, metadata, *, follow_symlinks=True): """ raise UnsupportedOperation(self._unsupported_msg('_write_metadata()')) - def _copy_metadata(self, target, *, follow_symlinks=True): - """ - Copies metadata (permissions, timestamps, etc) from this path to target. - """ - # Metadata types supported by both source and target. - keys = self._readable_metadata & target._writable_metadata - if keys: - metadata = self._read_metadata(keys, follow_symlinks=follow_symlinks) - target._write_metadata(metadata, follow_symlinks=follow_symlinks) - - def _copy_file(self, target): - """ - Copy the contents of this file to the given target. - """ - self._ensure_different_file(target) - with self.open('rb') as source_f: - try: - with target.open('wb') as target_f: - copyfileobj(source_f, target_f) - except IsADirectoryError as e: - if not target.exists(): - # Raise a less confusing exception. - raise FileNotFoundError( - f'Directory does not exist: {target}') from e - else: - raise - def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False, preserve_metadata=False): """ @@ -866,25 +927,8 @@ def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False, """ if not isinstance(target, PathBase): target = self.with_segments(target) - self._ensure_distinct_path(target) - stack = [(self, target)] - while stack: - src, dst = stack.pop() - if not follow_symlinks and src.is_symlink(): - dst._symlink_to_target_of(src) - if preserve_metadata: - src._copy_metadata(dst, follow_symlinks=False) - elif src.is_dir(): - children = src.iterdir() - dst.mkdir(exist_ok=dirs_exist_ok) - stack.extend((child, dst.joinpath(child.name)) - for child in children) - if preserve_metadata: - src._copy_metadata(dst) - else: - src._copy_file(dst) - if preserve_metadata: - src._copy_metadata(dst) + copier = target._copier(follow_symlinks, dirs_exist_ok, preserve_metadata) + copier.copy(self, target) return target def copy_into(self, target_dir, *, follow_symlinks=True, @@ -931,7 +975,7 @@ def move(self, target): """ Recursively move this file or directory tree to the given destination. """ - self._ensure_different_file(target) + target._copier.ensure_different_files(self, target) try: return self.replace(target) except UnsupportedOperation: diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index b5d9dc49f58463..b9fb988641eeb1 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -19,7 +19,7 @@ from pathlib._os import (copyfile, file_metadata_keys, read_file_metadata, write_file_metadata) -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase, CopierBase __all__ = [ @@ -57,6 +57,39 @@ def __repr__(self): return "<{}.parents>".format(type(self._path).__name__) +class _Copier(CopierBase): + """Copier class that uses fast OS copy routine where possible, and ensures + symlinks' target_is_directory argument is properly set on Windows. + """ + __slots__ = () + + if copyfile: + def copy_file(self, source, target, metadata_keys, dir_entry=None): + """Copy the given file to the given target.""" + try: + source = os.fspath(source) + except TypeError: + if not isinstance(source, PathBase): + raise + CopierBase.copy_file(self, source, target, metadata_keys, dir_entry) + else: + copyfile(source, os.fspath(target)) + + if os.name == 'nt': + def copy_symlink(self, source, target, metadata_keys, dir_entry=None): + """Copy the given symlink to the given target.""" + if metadata_keys: + metadata = source._read_metadata( + metadata_keys, follow_symlinks=False, dir_entry=dir_entry) + else: + metadata = None + symlink_target = source.readlink() + symlink_is_directory = (dir_entry or source).is_dir() + target.symlink_to(symlink_target, symlink_is_directory) + if metadata: + target._write_metadata(metadata, follow_symlinks=False) + + class PurePath(PurePathBase): """Base class for manipulating paths without I/O. @@ -524,6 +557,11 @@ class Path(PathBase, PurePath): but cannot instantiate a WindowsPath on a POSIX system or vice versa. """ __slots__ = () + _copier = _Copier + _readable_metadata = file_metadata_keys + _writable_metadata = file_metadata_keys + _read_metadata = read_file_metadata + _write_metadata = write_file_metadata as_uri = PurePath.as_uri @classmethod @@ -634,6 +672,12 @@ def _filter_trailing_slash(self, paths): path_str = path_str[:-1] yield path_str + def _join_dir_entry(self, dir_entry): + path_str = dir_entry.name if str(self) == '.' else dir_entry.path + path = self.with_segments(path_str) + path._str = path_str + return path + def scandir(self): """Yield os.DirEntry objects of the directory contents. @@ -642,19 +686,6 @@ def scandir(self): """ return os.scandir(self) - def iterdir(self): - """Yield path objects of the directory contents. - - The children are yielded in arbitrary order, and the - special entries '.' and '..' are not included. - """ - root_dir = str(self) - with os.scandir(root_dir) as scandir_it: - paths = [entry.path for entry in scandir_it] - if root_dir == '.': - paths = map(self._remove_leading_dot, paths) - return map(self._from_parsed_string, paths) - def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. @@ -804,24 +835,6 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): if not exist_ok or not self.is_dir(): raise - _readable_metadata = _writable_metadata = file_metadata_keys - _read_metadata = read_file_metadata - _write_metadata = write_file_metadata - - if copyfile: - def _copy_file(self, target): - """ - Copy the contents of this file to the given target. - """ - try: - target = os.fspath(target) - except TypeError: - if not isinstance(target, PathBase): - raise - PathBase._copy_file(self, target) - else: - copyfile(os.fspath(self), target) - def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). @@ -884,14 +897,6 @@ def symlink_to(self, target, target_is_directory=False): """ os.symlink(target, self, target_is_directory) - if os.name == 'nt': - def _symlink_to_target_of(self, link): - """ - Make this path a symlink with the same target as the given link. - This is used by copy(). - """ - self.symlink_to(link.readlink(), link.is_dir()) - if hasattr(os, "link"): def hardlink_to(self, target): """ diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index 642b3a57c59a1d..bf03d574780871 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -174,40 +174,34 @@ def copyfileobj(source_f, target_f): file_metadata_keys = frozenset(file_metadata_keys) -def read_file_metadata(path, keys=None, *, follow_symlinks=True): +def read_file_metadata(path, metadata_keys, *, follow_symlinks=True, dir_entry=None): """ Returns local path metadata as a dict with string keys. """ - if keys is None: - keys = file_metadata_keys - assert keys.issubset(file_metadata_keys) - result = {} - for key in keys: - if key == 'xattrs': - try: - result['xattrs'] = [ - (attr, os.getxattr(path, attr, follow_symlinks=follow_symlinks)) - for attr in os.listxattr(path, follow_symlinks=follow_symlinks)] - except OSError as err: - if err.errno not in (EPERM, ENOTSUP, ENODATA, EINVAL, EACCES): - raise - continue - st = os.stat(path, follow_symlinks=follow_symlinks) - if key == 'mode': - result['mode'] = stat.S_IMODE(st.st_mode) - elif key == 'times_ns': - result['times_ns'] = st.st_atime_ns, st.st_mtime_ns - elif key == 'flags': - result['flags'] = st.st_flags - return result + metadata = {} + if 'mode' in metadata_keys or 'times_ns' in metadata_keys or 'flags' in metadata_keys: + st = (dir_entry or path).stat(follow_symlinks=follow_symlinks) + if 'mode' in metadata_keys: + metadata['mode'] = stat.S_IMODE(st.st_mode) + if 'times_ns' in metadata_keys: + metadata['times_ns'] = st.st_atime_ns, st.st_mtime_ns + if 'flags' in metadata_keys: + metadata['flags'] = st.st_flags + if 'xattrs' in metadata_keys: + try: + metadata['xattrs'] = [ + (attr, os.getxattr(path, attr, follow_symlinks=follow_symlinks)) + for attr in os.listxattr(path, follow_symlinks=follow_symlinks)] + except OSError as err: + if err.errno not in (EPERM, ENOTSUP, ENODATA, EINVAL, EACCES): + raise + return metadata def write_file_metadata(path, metadata, *, follow_symlinks=True): """ Sets local path metadata from the given dict with string keys. """ - assert frozenset(metadata.keys()).issubset(file_metadata_keys) - def _nop(*args, ns=None, follow_symlinks=None): pass diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2c48eeeda145d0..c8507e1f186994 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -784,8 +784,9 @@ def test_copy_dir_preserve_metadata(self): for subpath in ['.', 'fileC', 'dirD', 'dirD/fileD']: source_st = source.joinpath(subpath).stat() target_st = target.joinpath(subpath).stat() - self.assertLessEqual(source_st.st_atime, target_st.st_atime) - self.assertLessEqual(source_st.st_mtime, target_st.st_mtime) + # The modification times may be truncated in the new file. + self.assertLessEqual(source_st.st_atime, target_st.st_atime + 1) + self.assertLessEqual(source_st.st_mtime, target_st.st_mtime + 1) self.assertEqual(source_st.st_mode, target_st.st_mode) if hasattr(source_st, 'st_flags'): self.assertEqual(source_st.st_flags, target_st.st_flags) diff --git a/Misc/NEWS.d/next/Library/2024-11-01-04-21-26.gh-issue-125413.Z-jjZq.rst b/Misc/NEWS.d/next/Library/2024-11-01-04-21-26.gh-issue-125413.Z-jjZq.rst new file mode 100644 index 00000000000000..e148e6d33ca112 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-01-04-21-26.gh-issue-125413.Z-jjZq.rst @@ -0,0 +1,2 @@ +Speed up :meth:`Path.copy ` by making use of +:meth:`~pathlib.Path.scandir` internally.