__init__.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. import io
  2. import posixpath
  3. import zipfile
  4. import itertools
  5. import contextlib
  6. import pathlib
  7. import re
  8. from .glob import translate
  9. __all__ = ['Path']
  10. def _parents(path):
  11. """
  12. Given a path with elements separated by
  13. posixpath.sep, generate all parents of that path.
  14. >>> list(_parents('b/d'))
  15. ['b']
  16. >>> list(_parents('/b/d/'))
  17. ['/b']
  18. >>> list(_parents('b/d/f/'))
  19. ['b/d', 'b']
  20. >>> list(_parents('b'))
  21. []
  22. >>> list(_parents(''))
  23. []
  24. """
  25. return itertools.islice(_ancestry(path), 1, None)
  26. def _ancestry(path):
  27. """
  28. Given a path with elements separated by
  29. posixpath.sep, generate all elements of that path
  30. >>> list(_ancestry('b/d'))
  31. ['b/d', 'b']
  32. >>> list(_ancestry('/b/d/'))
  33. ['/b/d', '/b']
  34. >>> list(_ancestry('b/d/f/'))
  35. ['b/d/f', 'b/d', 'b']
  36. >>> list(_ancestry('b'))
  37. ['b']
  38. >>> list(_ancestry(''))
  39. []
  40. """
  41. path = path.rstrip(posixpath.sep)
  42. while path and path != posixpath.sep:
  43. yield path
  44. path, tail = posixpath.split(path)
  45. _dedupe = dict.fromkeys
  46. """Deduplicate an iterable in original order"""
  47. def _difference(minuend, subtrahend):
  48. """
  49. Return items in minuend not in subtrahend, retaining order
  50. with O(1) lookup.
  51. """
  52. return itertools.filterfalse(set(subtrahend).__contains__, minuend)
  53. class InitializedState:
  54. """
  55. Mix-in to save the initialization state for pickling.
  56. """
  57. def __init__(self, *args, **kwargs):
  58. self.__args = args
  59. self.__kwargs = kwargs
  60. super().__init__(*args, **kwargs)
  61. def __getstate__(self):
  62. return self.__args, self.__kwargs
  63. def __setstate__(self, state):
  64. args, kwargs = state
  65. super().__init__(*args, **kwargs)
  66. class CompleteDirs(InitializedState, zipfile.ZipFile):
  67. """
  68. A ZipFile subclass that ensures that implied directories
  69. are always included in the namelist.
  70. >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt']))
  71. ['foo/', 'foo/bar/']
  72. >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/']))
  73. ['foo/']
  74. """
  75. @staticmethod
  76. def _implied_dirs(names):
  77. parents = itertools.chain.from_iterable(map(_parents, names))
  78. as_dirs = (p + posixpath.sep for p in parents)
  79. return _dedupe(_difference(as_dirs, names))
  80. def namelist(self):
  81. names = super().namelist()
  82. return names + list(self._implied_dirs(names))
  83. def _name_set(self):
  84. return set(self.namelist())
  85. def resolve_dir(self, name):
  86. """
  87. If the name represents a directory, return that name
  88. as a directory (with the trailing slash).
  89. """
  90. names = self._name_set()
  91. dirname = name + '/'
  92. dir_match = name not in names and dirname in names
  93. return dirname if dir_match else name
  94. def getinfo(self, name):
  95. """
  96. Supplement getinfo for implied dirs.
  97. """
  98. try:
  99. return super().getinfo(name)
  100. except KeyError:
  101. if not name.endswith('/') or name not in self._name_set():
  102. raise
  103. return zipfile.ZipInfo(filename=name)
  104. @classmethod
  105. def make(cls, source):
  106. """
  107. Given a source (filename or zipfile), return an
  108. appropriate CompleteDirs subclass.
  109. """
  110. if isinstance(source, CompleteDirs):
  111. return source
  112. if not isinstance(source, zipfile.ZipFile):
  113. return cls(source)
  114. # Only allow for FastLookup when supplied zipfile is read-only
  115. if 'r' not in source.mode:
  116. cls = CompleteDirs
  117. source.__class__ = cls
  118. return source
  119. class FastLookup(CompleteDirs):
  120. """
  121. ZipFile subclass to ensure implicit
  122. dirs exist and are resolved rapidly.
  123. """
  124. def namelist(self):
  125. with contextlib.suppress(AttributeError):
  126. return self.__names
  127. self.__names = super().namelist()
  128. return self.__names
  129. def _name_set(self):
  130. with contextlib.suppress(AttributeError):
  131. return self.__lookup
  132. self.__lookup = super()._name_set()
  133. return self.__lookup
  134. def _extract_text_encoding(encoding=None, *args, **kwargs):
  135. # stacklevel=3 so that the caller of the caller see any warning.
  136. return io.text_encoding(encoding, 3), args, kwargs
  137. class Path:
  138. """
  139. A pathlib-compatible interface for zip files.
  140. Consider a zip file with this structure::
  141. .
  142. ├── a.txt
  143. └── b
  144. ├── c.txt
  145. └── d
  146. └── e.txt
  147. >>> data = io.BytesIO()
  148. >>> zf = ZipFile(data, 'w')
  149. >>> zf.writestr('a.txt', 'content of a')
  150. >>> zf.writestr('b/c.txt', 'content of c')
  151. >>> zf.writestr('b/d/e.txt', 'content of e')
  152. >>> zf.filename = 'mem/abcde.zip'
  153. Path accepts the zipfile object itself or a filename
  154. >>> root = Path(zf)
  155. From there, several path operations are available.
  156. Directory iteration (including the zip file itself):
  157. >>> a, b = root.iterdir()
  158. >>> a
  159. Path('mem/abcde.zip', 'a.txt')
  160. >>> b
  161. Path('mem/abcde.zip', 'b/')
  162. name property:
  163. >>> b.name
  164. 'b'
  165. join with divide operator:
  166. >>> c = b / 'c.txt'
  167. >>> c
  168. Path('mem/abcde.zip', 'b/c.txt')
  169. >>> c.name
  170. 'c.txt'
  171. Read text:
  172. >>> c.read_text(encoding='utf-8')
  173. 'content of c'
  174. existence:
  175. >>> c.exists()
  176. True
  177. >>> (b / 'missing.txt').exists()
  178. False
  179. Coercion to string:
  180. >>> import os
  181. >>> str(c).replace(os.sep, posixpath.sep)
  182. 'mem/abcde.zip/b/c.txt'
  183. At the root, ``name``, ``filename``, and ``parent``
  184. resolve to the zipfile. Note these attributes are not
  185. valid and will raise a ``ValueError`` if the zipfile
  186. has no filename.
  187. >>> root.name
  188. 'abcde.zip'
  189. >>> str(root.filename).replace(os.sep, posixpath.sep)
  190. 'mem/abcde.zip'
  191. >>> str(root.parent)
  192. 'mem'
  193. """
  194. __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
  195. def __init__(self, root, at=""):
  196. """
  197. Construct a Path from a ZipFile or filename.
  198. Note: When the source is an existing ZipFile object,
  199. its type (__class__) will be mutated to a
  200. specialized type. If the caller wishes to retain the
  201. original type, the caller should either create a
  202. separate ZipFile object or pass a filename.
  203. """
  204. self.root = FastLookup.make(root)
  205. self.at = at
  206. def __eq__(self, other):
  207. """
  208. >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo'
  209. False
  210. """
  211. if self.__class__ is not other.__class__:
  212. return NotImplemented
  213. return (self.root, self.at) == (other.root, other.at)
  214. def __hash__(self):
  215. return hash((self.root, self.at))
  216. def open(self, mode='r', *args, pwd=None, **kwargs):
  217. """
  218. Open this entry as text or binary following the semantics
  219. of ``pathlib.Path.open()`` by passing arguments through
  220. to io.TextIOWrapper().
  221. """
  222. if self.is_dir():
  223. raise IsADirectoryError(self)
  224. zip_mode = mode[0]
  225. if not self.exists() and zip_mode == 'r':
  226. raise FileNotFoundError(self)
  227. stream = self.root.open(self.at, zip_mode, pwd=pwd)
  228. if 'b' in mode:
  229. if args or kwargs:
  230. raise ValueError("encoding args invalid for binary operation")
  231. return stream
  232. # Text mode:
  233. encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
  234. return io.TextIOWrapper(stream, encoding, *args, **kwargs)
  235. def _base(self):
  236. return pathlib.PurePosixPath(self.at or self.root.filename)
  237. @property
  238. def name(self):
  239. return self._base().name
  240. @property
  241. def suffix(self):
  242. return self._base().suffix
  243. @property
  244. def suffixes(self):
  245. return self._base().suffixes
  246. @property
  247. def stem(self):
  248. return self._base().stem
  249. @property
  250. def filename(self):
  251. return pathlib.Path(self.root.filename).joinpath(self.at)
  252. def read_text(self, *args, **kwargs):
  253. encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
  254. with self.open('r', encoding, *args, **kwargs) as strm:
  255. return strm.read()
  256. def read_bytes(self):
  257. with self.open('rb') as strm:
  258. return strm.read()
  259. def _is_child(self, path):
  260. return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
  261. def _next(self, at):
  262. return self.__class__(self.root, at)
  263. def is_dir(self):
  264. return not self.at or self.at.endswith("/")
  265. def is_file(self):
  266. return self.exists() and not self.is_dir()
  267. def exists(self):
  268. return self.at in self.root._name_set()
  269. def iterdir(self):
  270. if not self.is_dir():
  271. raise ValueError("Can't listdir a file")
  272. subs = map(self._next, self.root.namelist())
  273. return filter(self._is_child, subs)
  274. def match(self, path_pattern):
  275. return pathlib.PurePosixPath(self.at).match(path_pattern)
  276. def is_symlink(self):
  277. """
  278. Return whether this path is a symlink. Always false (python/cpython#82102).
  279. """
  280. return False
  281. def glob(self, pattern):
  282. if not pattern:
  283. raise ValueError(f"Unacceptable pattern: {pattern!r}")
  284. prefix = re.escape(self.at)
  285. matches = re.compile(prefix + translate(pattern)).fullmatch
  286. return map(self._next, filter(matches, self.root.namelist()))
  287. def rglob(self, pattern):
  288. return self.glob(f'**/{pattern}')
  289. def relative_to(self, other, *extra):
  290. return posixpath.relpath(str(self), str(other.joinpath(*extra)))
  291. def __str__(self):
  292. return posixpath.join(self.root.filename, self.at)
  293. def __repr__(self):
  294. return self.__repr.format(self=self)
  295. def joinpath(self, *other):
  296. next = posixpath.join(self.at, *other)
  297. return self._next(self.root.resolve_dir(next))
  298. __truediv__ = joinpath
  299. @property
  300. def parent(self):
  301. if not self.at:
  302. return self.filename.parent
  303. parent_at = posixpath.dirname(self.at.rstrip('/'))
  304. if parent_at:
  305. parent_at += '/'
  306. return self._next(parent_at)