123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693 |
- """scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib
- scandir() is a generator version of os.listdir() that returns an
- iterator over files in a directory, and also exposes the extra
- information most OSes provide while iterating files in a directory
- (such as type and stat information).
- This module also includes a version of os.walk() that uses scandir()
- to speed it up significantly.
- See README.md or https://github.com/benhoyt/scandir for rationale and
- docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for
- more details on its inclusion into Python 3.5
- scandir is released under the new BSD 3-clause license. See
- LICENSE.txt for the full license text.
- """
- from __future__ import division
- from errno import ENOENT
- from os import listdir, lstat, stat, strerror
- from os.path import join, islink
- from stat import S_IFDIR, S_IFLNK, S_IFREG
- import collections
- import sys
- try:
- import _scandir
- except ImportError:
- _scandir = None
- try:
- import ctypes
- except ImportError:
- ctypes = None
- if _scandir is None and ctypes is None:
- import warnings
- warnings.warn("scandir can't find the compiled _scandir C module "
- "or ctypes, using slow generic fallback")
- __version__ = '1.10.0'
- __all__ = ['scandir', 'walk']
- # Windows FILE_ATTRIBUTE constants for interpreting the
- # FIND_DATA.dwFileAttributes member
- FILE_ATTRIBUTE_ARCHIVE = 32
- FILE_ATTRIBUTE_COMPRESSED = 2048
- FILE_ATTRIBUTE_DEVICE = 64
- FILE_ATTRIBUTE_DIRECTORY = 16
- FILE_ATTRIBUTE_ENCRYPTED = 16384
- FILE_ATTRIBUTE_HIDDEN = 2
- FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768
- FILE_ATTRIBUTE_NORMAL = 128
- FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192
- FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072
- FILE_ATTRIBUTE_OFFLINE = 4096
- FILE_ATTRIBUTE_READONLY = 1
- FILE_ATTRIBUTE_REPARSE_POINT = 1024
- FILE_ATTRIBUTE_SPARSE_FILE = 512
- FILE_ATTRIBUTE_SYSTEM = 4
- FILE_ATTRIBUTE_TEMPORARY = 256
- FILE_ATTRIBUTE_VIRTUAL = 65536
- IS_PY3 = sys.version_info >= (3, 0)
- if IS_PY3:
- unicode = str # Because Python <= 3.2 doesn't have u'unicode' syntax
- class GenericDirEntry(object):
- __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path')
- def __init__(self, scandir_path, name):
- self._scandir_path = scandir_path
- self.name = name
- self._stat = None
- self._lstat = None
- self._path = None
- @property
- def path(self):
- if self._path is None:
- self._path = join(self._scandir_path, self.name)
- return self._path
- def stat(self, follow_symlinks=True):
- if follow_symlinks:
- if self._stat is None:
- self._stat = stat(self.path)
- return self._stat
- else:
- if self._lstat is None:
- self._lstat = lstat(self.path)
- return self._lstat
- # The code duplication below is intentional: this is for slightly
- # better performance on systems that fall back to GenericDirEntry.
- # It avoids an additional attribute lookup and method call, which
- # are relatively slow on CPython.
- def is_dir(self, follow_symlinks=True):
- try:
- st = self.stat(follow_symlinks=follow_symlinks)
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False # Path doesn't exist or is a broken symlink
- return st.st_mode & 0o170000 == S_IFDIR
- def is_file(self, follow_symlinks=True):
- try:
- st = self.stat(follow_symlinks=follow_symlinks)
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False # Path doesn't exist or is a broken symlink
- return st.st_mode & 0o170000 == S_IFREG
- def is_symlink(self):
- try:
- st = self.stat(follow_symlinks=False)
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False # Path doesn't exist or is a broken symlink
- return st.st_mode & 0o170000 == S_IFLNK
- def inode(self):
- st = self.stat(follow_symlinks=False)
- return st.st_ino
- def __str__(self):
- return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
- __repr__ = __str__
- def _scandir_generic(path=unicode('.')):
- """Like os.listdir(), but yield DirEntry objects instead of returning
- a list of names.
- """
- for name in listdir(path):
- yield GenericDirEntry(path, name)
- if IS_PY3 and sys.platform == 'win32':
- def scandir_generic(path=unicode('.')):
- if isinstance(path, bytes):
- raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead")
- return _scandir_generic(path)
- scandir_generic.__doc__ = _scandir_generic.__doc__
- else:
- scandir_generic = _scandir_generic
- scandir_c = None
- scandir_python = None
- if sys.platform == 'win32':
- if ctypes is not None:
- from ctypes import wintypes
- # Various constants from windows.h
- INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value
- ERROR_FILE_NOT_FOUND = 2
- ERROR_NO_MORE_FILES = 18
- IO_REPARSE_TAG_SYMLINK = 0xA000000C
- # Numer of seconds between 1601-01-01 and 1970-01-01
- SECONDS_BETWEEN_EPOCHS = 11644473600
- kernel32 = ctypes.windll.kernel32
- # ctypes wrappers for (wide string versions of) FindFirstFile,
- # FindNextFile, and FindClose
- FindFirstFile = kernel32.FindFirstFileW
- FindFirstFile.argtypes = [
- wintypes.LPCWSTR,
- ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
- ]
- FindFirstFile.restype = wintypes.HANDLE
- FindNextFile = kernel32.FindNextFileW
- FindNextFile.argtypes = [
- wintypes.HANDLE,
- ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
- ]
- FindNextFile.restype = wintypes.BOOL
- FindClose = kernel32.FindClose
- FindClose.argtypes = [wintypes.HANDLE]
- FindClose.restype = wintypes.BOOL
- Win32StatResult = collections.namedtuple('Win32StatResult', [
- 'st_mode',
- 'st_ino',
- 'st_dev',
- 'st_nlink',
- 'st_uid',
- 'st_gid',
- 'st_size',
- 'st_atime',
- 'st_mtime',
- 'st_ctime',
- 'st_atime_ns',
- 'st_mtime_ns',
- 'st_ctime_ns',
- 'st_file_attributes',
- ])
- def filetime_to_time(filetime):
- """Convert Win32 FILETIME to time since Unix epoch in seconds."""
- total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime
- return total / 10000000 - SECONDS_BETWEEN_EPOCHS
- def find_data_to_stat(data):
- """Convert Win32 FIND_DATA struct to stat_result."""
- # First convert Win32 dwFileAttributes to st_mode
- attributes = data.dwFileAttributes
- st_mode = 0
- if attributes & FILE_ATTRIBUTE_DIRECTORY:
- st_mode |= S_IFDIR | 0o111
- else:
- st_mode |= S_IFREG
- if attributes & FILE_ATTRIBUTE_READONLY:
- st_mode |= 0o444
- else:
- st_mode |= 0o666
- if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and
- data.dwReserved0 == IO_REPARSE_TAG_SYMLINK):
- st_mode ^= st_mode & 0o170000
- st_mode |= S_IFLNK
- st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow
- st_atime = filetime_to_time(data.ftLastAccessTime)
- st_mtime = filetime_to_time(data.ftLastWriteTime)
- st_ctime = filetime_to_time(data.ftCreationTime)
- # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev,
- # st_nlink, st_uid, st_gid
- return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size,
- st_atime, st_mtime, st_ctime,
- int(st_atime * 1000000000),
- int(st_mtime * 1000000000),
- int(st_ctime * 1000000000),
- attributes)
- class Win32DirEntryPython(object):
- __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode')
- def __init__(self, scandir_path, name, find_data):
- self._scandir_path = scandir_path
- self.name = name
- self._stat = None
- self._lstat = None
- self._find_data = find_data
- self._path = None
- self._inode = None
- @property
- def path(self):
- if self._path is None:
- self._path = join(self._scandir_path, self.name)
- return self._path
- def stat(self, follow_symlinks=True):
- if follow_symlinks:
- if self._stat is None:
- if self.is_symlink():
- # It's a symlink, call link-following stat()
- self._stat = stat(self.path)
- else:
- # Not a symlink, stat is same as lstat value
- if self._lstat is None:
- self._lstat = find_data_to_stat(self._find_data)
- self._stat = self._lstat
- return self._stat
- else:
- if self._lstat is None:
- # Lazily convert to stat object, because it's slow
- # in Python, and often we only need is_dir() etc
- self._lstat = find_data_to_stat(self._find_data)
- return self._lstat
- def is_dir(self, follow_symlinks=True):
- is_symlink = self.is_symlink()
- if follow_symlinks and is_symlink:
- try:
- return self.stat().st_mode & 0o170000 == S_IFDIR
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False
- elif is_symlink:
- return False
- else:
- return (self._find_data.dwFileAttributes &
- FILE_ATTRIBUTE_DIRECTORY != 0)
- def is_file(self, follow_symlinks=True):
- is_symlink = self.is_symlink()
- if follow_symlinks and is_symlink:
- try:
- return self.stat().st_mode & 0o170000 == S_IFREG
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False
- elif is_symlink:
- return False
- else:
- return (self._find_data.dwFileAttributes &
- FILE_ATTRIBUTE_DIRECTORY == 0)
- def is_symlink(self):
- return (self._find_data.dwFileAttributes &
- FILE_ATTRIBUTE_REPARSE_POINT != 0 and
- self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK)
- def inode(self):
- if self._inode is None:
- self._inode = lstat(self.path).st_ino
- return self._inode
- def __str__(self):
- return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
- __repr__ = __str__
- def win_error(error, filename):
- exc = WindowsError(error, ctypes.FormatError(error))
- exc.filename = filename
- return exc
- def _scandir_python(path=unicode('.')):
- """Like os.listdir(), but yield DirEntry objects instead of returning
- a list of names.
- """
- # Call FindFirstFile and handle errors
- if isinstance(path, bytes):
- is_bytes = True
- filename = join(path.decode('mbcs', 'strict'), '*.*')
- else:
- is_bytes = False
- filename = join(path, '*.*')
- data = wintypes.WIN32_FIND_DATAW()
- data_p = ctypes.byref(data)
- handle = FindFirstFile(filename, data_p)
- if handle == INVALID_HANDLE_VALUE:
- error = ctypes.GetLastError()
- if error == ERROR_FILE_NOT_FOUND:
- # No files, don't yield anything
- return
- raise win_error(error, path)
- # Call FindNextFile in a loop, stopping when no more files
- try:
- while True:
- # Skip '.' and '..' (current and parent directory), but
- # otherwise yield (filename, stat_result) tuple
- name = data.cFileName
- if name not in ('.', '..'):
- if is_bytes:
- name = name.encode('mbcs', 'replace')
- yield Win32DirEntryPython(path, name, data)
- data = wintypes.WIN32_FIND_DATAW()
- data_p = ctypes.byref(data)
- success = FindNextFile(handle, data_p)
- if not success:
- error = ctypes.GetLastError()
- if error == ERROR_NO_MORE_FILES:
- break
- raise win_error(error, path)
- finally:
- if not FindClose(handle):
- raise win_error(ctypes.GetLastError(), path)
- if IS_PY3:
- def scandir_python(path=unicode('.')):
- if isinstance(path, bytes):
- raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead")
- return _scandir_python(path)
- scandir_python.__doc__ = _scandir_python.__doc__
- else:
- scandir_python = _scandir_python
- if _scandir is not None:
- scandir_c = _scandir.scandir
- DirEntry_c = _scandir.DirEntry
- if _scandir is not None:
- scandir = scandir_c
- DirEntry = DirEntry_c
- elif ctypes is not None:
- scandir = scandir_python
- DirEntry = Win32DirEntryPython
- else:
- scandir = scandir_generic
- DirEntry = GenericDirEntry
- # Linux, OS X, and BSD implementation
- elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform:
- have_dirent_d_type = (sys.platform != 'sunos5')
- if ctypes is not None and have_dirent_d_type:
- import ctypes.util
- DIR_p = ctypes.c_void_p
- # Rather annoying how the dirent struct is slightly different on each
- # platform. The only fields we care about are d_name and d_type.
- class Dirent(ctypes.Structure):
- if sys.platform.startswith('linux'):
- _fields_ = (
- ('d_ino', ctypes.c_ulong),
- ('d_off', ctypes.c_long),
- ('d_reclen', ctypes.c_ushort),
- ('d_type', ctypes.c_byte),
- ('d_name', ctypes.c_char * 256),
- )
- elif 'openbsd' in sys.platform:
- _fields_ = (
- ('d_ino', ctypes.c_uint64),
- ('d_off', ctypes.c_uint64),
- ('d_reclen', ctypes.c_uint16),
- ('d_type', ctypes.c_uint8),
- ('d_namlen', ctypes.c_uint8),
- ('__d_padding', ctypes.c_uint8 * 4),
- ('d_name', ctypes.c_char * 256),
- )
- else:
- _fields_ = (
- ('d_ino', ctypes.c_uint32), # must be uint32, not ulong
- ('d_reclen', ctypes.c_ushort),
- ('d_type', ctypes.c_byte),
- ('d_namlen', ctypes.c_byte),
- ('d_name', ctypes.c_char * 256),
- )
- DT_UNKNOWN = 0
- DT_DIR = 4
- DT_REG = 8
- DT_LNK = 10
- Dirent_p = ctypes.POINTER(Dirent)
- Dirent_pp = ctypes.POINTER(Dirent_p)
- libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
- opendir = libc.opendir
- opendir.argtypes = [ctypes.c_char_p]
- opendir.restype = DIR_p
- readdir_r = libc.readdir_r
- readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp]
- readdir_r.restype = ctypes.c_int
- closedir = libc.closedir
- closedir.argtypes = [DIR_p]
- closedir.restype = ctypes.c_int
- file_system_encoding = sys.getfilesystemencoding()
- class PosixDirEntry(object):
- __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode')
- def __init__(self, scandir_path, name, d_type, inode):
- self._scandir_path = scandir_path
- self.name = name
- self._d_type = d_type
- self._inode = inode
- self._stat = None
- self._lstat = None
- self._path = None
- @property
- def path(self):
- if self._path is None:
- self._path = join(self._scandir_path, self.name)
- return self._path
- def stat(self, follow_symlinks=True):
- if follow_symlinks:
- if self._stat is None:
- if self.is_symlink():
- self._stat = stat(self.path)
- else:
- if self._lstat is None:
- self._lstat = lstat(self.path)
- self._stat = self._lstat
- return self._stat
- else:
- if self._lstat is None:
- self._lstat = lstat(self.path)
- return self._lstat
- def is_dir(self, follow_symlinks=True):
- if (self._d_type == DT_UNKNOWN or
- (follow_symlinks and self.is_symlink())):
- try:
- st = self.stat(follow_symlinks=follow_symlinks)
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False
- return st.st_mode & 0o170000 == S_IFDIR
- else:
- return self._d_type == DT_DIR
- def is_file(self, follow_symlinks=True):
- if (self._d_type == DT_UNKNOWN or
- (follow_symlinks and self.is_symlink())):
- try:
- st = self.stat(follow_symlinks=follow_symlinks)
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False
- return st.st_mode & 0o170000 == S_IFREG
- else:
- return self._d_type == DT_REG
- def is_symlink(self):
- if self._d_type == DT_UNKNOWN:
- try:
- st = self.stat(follow_symlinks=False)
- except OSError as e:
- if e.errno != ENOENT:
- raise
- return False
- return st.st_mode & 0o170000 == S_IFLNK
- else:
- return self._d_type == DT_LNK
- def inode(self):
- return self._inode
- def __str__(self):
- return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
- __repr__ = __str__
- def posix_error(filename):
- errno = ctypes.get_errno()
- exc = OSError(errno, strerror(errno))
- exc.filename = filename
- return exc
- def scandir_python(path=unicode('.')):
- """Like os.listdir(), but yield DirEntry objects instead of returning
- a list of names.
- """
- if isinstance(path, bytes):
- opendir_path = path
- is_bytes = True
- else:
- opendir_path = path.encode(file_system_encoding)
- is_bytes = False
- dir_p = opendir(opendir_path)
- if not dir_p:
- raise posix_error(path)
- try:
- result = Dirent_p()
- while True:
- entry = Dirent()
- if readdir_r(dir_p, entry, result):
- raise posix_error(path)
- if not result:
- break
- name = entry.d_name
- if name not in (b'.', b'..'):
- if not is_bytes:
- name = name.decode(file_system_encoding)
- yield PosixDirEntry(path, name, entry.d_type, entry.d_ino)
- finally:
- if closedir(dir_p):
- raise posix_error(path)
- if _scandir is not None:
- scandir_c = _scandir.scandir
- DirEntry_c = _scandir.DirEntry
- if _scandir is not None:
- scandir = scandir_c
- DirEntry = DirEntry_c
- elif ctypes is not None and have_dirent_d_type:
- scandir = scandir_python
- DirEntry = PosixDirEntry
- else:
- scandir = scandir_generic
- DirEntry = GenericDirEntry
- # Some other system -- no d_type or stat information
- else:
- scandir = scandir_generic
- DirEntry = GenericDirEntry
- def _walk(top, topdown=True, onerror=None, followlinks=False):
- """Like Python 3.5's implementation of os.walk() -- faster than
- the pre-Python 3.5 version as it uses scandir() internally.
- """
- dirs = []
- nondirs = []
- # We may not have read permission for top, in which case we can't
- # get a list of the files the directory contains. os.walk
- # always suppressed the exception then, rather than blow up for a
- # minor reason when (say) a thousand readable directories are still
- # left to visit. That logic is copied here.
- try:
- scandir_it = scandir(top)
- except OSError as error:
- if onerror is not None:
- onerror(error)
- return
- while True:
- try:
- try:
- entry = next(scandir_it)
- except StopIteration:
- break
- except OSError as error:
- if onerror is not None:
- onerror(error)
- return
- try:
- is_dir = entry.is_dir()
- except OSError:
- # If is_dir() raises an OSError, consider that the entry is not
- # a directory, same behaviour than os.path.isdir().
- is_dir = False
- if is_dir:
- dirs.append(entry.name)
- else:
- nondirs.append(entry.name)
- if not topdown and is_dir:
- # Bottom-up: recurse into sub-directory, but exclude symlinks to
- # directories if followlinks is False
- if followlinks:
- walk_into = True
- else:
- try:
- is_symlink = entry.is_symlink()
- except OSError:
- # If is_symlink() raises an OSError, consider that the
- # entry is not a symbolic link, same behaviour than
- # os.path.islink().
- is_symlink = False
- walk_into = not is_symlink
- if walk_into:
- for entry in walk(entry.path, topdown, onerror, followlinks):
- yield entry
- # Yield before recursion if going top down
- if topdown:
- yield top, dirs, nondirs
- # Recurse into sub-directories
- for name in dirs:
- new_path = join(top, name)
- # Issue #23605: os.path.islink() is used instead of caching
- # entry.is_symlink() result during the loop on os.scandir() because
- # the caller can replace the directory entry during the "yield"
- # above.
- if followlinks or not islink(new_path):
- for entry in walk(new_path, topdown, onerror, followlinks):
- yield entry
- else:
- # Yield after recursion if going bottom up
- yield top, dirs, nondirs
- if IS_PY3 or sys.platform != 'win32':
- walk = _walk
- else:
- # Fix for broken unicode handling on Windows on Python 2.x, see:
- # https://github.com/benhoyt/scandir/issues/54
- file_system_encoding = sys.getfilesystemencoding()
- def walk(top, topdown=True, onerror=None, followlinks=False):
- if isinstance(top, bytes):
- top = top.decode(file_system_encoding)
- return _walk(top, topdown, onerror, followlinks)
|