more.py 72 KB


  1. from __future__ import print_function
  2. from collections import Counter, defaultdict, deque
  3. from functools import partial, wraps
  4. from heapq import merge
  5. from itertools import (
  6. chain,
  7. compress,
  8. count,
  9. cycle,
  10. dropwhile,
  11. groupby,
  12. islice,
  13. repeat,
  14. starmap,
  15. takewhile,
  16. tee
  17. )
  18. from operator import itemgetter, lt, gt, sub
  19. from sys import maxsize, version_info
  20. try:
  21. from collections.abc import Sequence
  22. except ImportError:
  23. from collections import Sequence
  24. from six import binary_type, string_types, text_type
  25. from six.moves import filter, map, range, zip, zip_longest
  26. from .recipes import consume, flatten, take
  27. __all__ = [
  28. 'adjacent',
  29. 'always_iterable',
  30. 'always_reversible',
  31. 'bucket',
  32. 'chunked',
  33. 'circular_shifts',
  34. 'collapse',
  35. 'collate',
  36. 'consecutive_groups',
  37. 'consumer',
  38. 'count_cycle',
  39. 'difference',
  40. 'distinct_permutations',
  41. 'distribute',
  42. 'divide',
  43. 'exactly_n',
  44. 'first',
  45. 'groupby_transform',
  46. 'ilen',
  47. 'interleave_longest',
  48. 'interleave',
  49. 'intersperse',
  50. 'islice_extended',
  51. 'iterate',
  52. 'last',
  53. 'locate',
  54. 'lstrip',
  55. 'make_decorator',
  56. 'map_reduce',
  57. 'numeric_range',
  58. 'one',
  59. 'padded',
  60. 'peekable',
  61. 'replace',
  62. 'rlocate',
  63. 'rstrip',
  64. 'run_length',
  65. 'seekable',
  66. 'SequenceView',
  67. 'side_effect',
  68. 'sliced',
  69. 'sort_together',
  70. 'split_at',
  71. 'split_after',
  72. 'split_before',
  73. 'split_into',
  74. 'spy',
  75. 'stagger',
  76. 'strip',
  77. 'substrings',
  78. 'unique_to_each',
  79. 'unzip',
  80. 'windowed',
  81. 'with_iter',
  82. 'zip_offset',
  83. ]
  84. _marker = object()
  85. def chunked(iterable, n):
  86. """Break *iterable* into lists of length *n*:
  87. >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
  88. [[1, 2, 3], [4, 5, 6]]
  89. If the length of *iterable* is not evenly divisible by *n*, the last
  90. returned list will be shorter:
  91. >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
  92. [[1, 2, 3], [4, 5, 6], [7, 8]]
  93. To use a fill-in value instead, see the :func:`grouper` recipe.
  94. :func:`chunked` is useful for splitting up a computation on a large number
  95. of keys into batches, to be pickled and sent off to worker processes. One
  96. example is operations on rows in MySQL, which does not implement
  97. server-side cursors properly and would otherwise load the entire dataset
  98. into RAM on the client.
  99. """
  100. return iter(partial(take, n, iter(iterable)), [])
  101. def first(iterable, default=_marker):
  102. """Return the first item of *iterable*, or *default* if *iterable* is
  103. empty.
  104. >>> first([0, 1, 2, 3])
  105. 0
  106. >>> first([], 'some default')
  107. 'some default'
  108. If *default* is not provided and there are no items in the iterable,
  109. raise ``ValueError``.
  110. :func:`first` is useful when you have a generator of expensive-to-retrieve
  111. values and want any arbitrary one. It is marginally shorter than
  112. ``next(iter(iterable), default)``.
  113. """
  114. try:
  115. return next(iter(iterable))
  116. except StopIteration:
  117. # I'm on the edge about raising ValueError instead of StopIteration. At
  118. # the moment, ValueError wins, because the caller could conceivably
  119. # want to do something different with flow control when I raise the
  120. # exception, and it's weird to explicitly catch StopIteration.
  121. if default is _marker:
  122. raise ValueError('first() was called on an empty iterable, and no '
  123. 'default value was provided.')
  124. return default
  125. def last(iterable, default=_marker):
  126. """Return the last item of *iterable*, or *default* if *iterable* is
  127. empty.
  128. >>> last([0, 1, 2, 3])
  129. 3
  130. >>> last([], 'some default')
  131. 'some default'
  132. If *default* is not provided and there are no items in the iterable,
  133. raise ``ValueError``.
  134. """
  135. try:
  136. try:
  137. # Try to access the last item directly
  138. return iterable[-1]
  139. except (TypeError, AttributeError, KeyError):
  140. # If not slice-able, iterate entirely using length-1 deque
  141. return deque(iterable, maxlen=1)[0]
  142. except IndexError: # If the iterable was empty
  143. if default is _marker:
  144. raise ValueError('last() was called on an empty iterable, and no '
  145. 'default value was provided.')
  146. return default
  147. class peekable(object):
  148. """Wrap an iterator to allow lookahead and prepending elements.
  149. Call :meth:`peek` on the result to get the value that will be returned
  150. by :func:`next`. This won't advance the iterator:
  151. >>> p = peekable(['a', 'b'])
  152. >>> p.peek()
  153. 'a'
  154. >>> next(p)
  155. 'a'
  156. Pass :meth:`peek` a default value to return that instead of raising
  157. ``StopIteration`` when the iterator is exhausted.
  158. >>> p = peekable([])
  159. >>> p.peek('hi')
  160. 'hi'
  161. peekables also offer a :meth:`prepend` method, which "inserts" items
  162. at the head of the iterable:
  163. >>> p = peekable([1, 2, 3])
  164. >>> p.prepend(10, 11, 12)
  165. >>> next(p)
  166. 10
  167. >>> p.peek()
  168. 11
  169. >>> list(p)
  170. [11, 12, 1, 2, 3]
  171. peekables can be indexed. Index 0 is the item that will be returned by
  172. :func:`next`, index 1 is the item after that, and so on:
  173. The values up to the given index will be cached.
  174. >>> p = peekable(['a', 'b', 'c', 'd'])
  175. >>> p[0]
  176. 'a'
  177. >>> p[1]
  178. 'b'
  179. >>> next(p)
  180. 'a'
  181. Negative indexes are supported, but be aware that they will cache the
  182. remaining items in the source iterator, which may require significant
  183. storage.
  184. To check whether a peekable is exhausted, check its truth value:
  185. >>> p = peekable(['a', 'b'])
  186. >>> if p: # peekable has items
  187. ... list(p)
  188. ['a', 'b']
  189. >>> if not p: # peekable is exhaused
  190. ... list(p)
  191. []
  192. """
  193. def __init__(self, iterable):
  194. self._it = iter(iterable)
  195. self._cache = deque()
  196. def __iter__(self):
  197. return self
  198. def __bool__(self):
  199. try:
  200. self.peek()
  201. except StopIteration:
  202. return False
  203. return True
  204. def __nonzero__(self):
  205. # For Python 2 compatibility
  206. return self.__bool__()
  207. def peek(self, default=_marker):
  208. """Return the item that will be next returned from ``next()``.
  209. Return ``default`` if there are no items left. If ``default`` is not
  210. provided, raise ``StopIteration``.
  211. """
  212. if not self._cache:
  213. try:
  214. self._cache.append(next(self._it))
  215. except StopIteration:
  216. if default is _marker:
  217. raise
  218. return default
  219. return self._cache[0]
  220. def prepend(self, *items):
  221. """Stack up items to be the next ones returned from ``next()`` or
  222. ``self.peek()``. The items will be returned in
  223. first in, first out order::
  224. >>> p = peekable([1, 2, 3])
  225. >>> p.prepend(10, 11, 12)
  226. >>> next(p)
  227. 10
  228. >>> list(p)
  229. [11, 12, 1, 2, 3]
  230. It is possible, by prepending items, to "resurrect" a peekable that
  231. previously raised ``StopIteration``.
  232. >>> p = peekable([])
  233. >>> next(p)
  234. Traceback (most recent call last):
  235. ...
  236. StopIteration
  237. >>> p.prepend(1)
  238. >>> next(p)
  239. 1
  240. >>> next(p)
  241. Traceback (most recent call last):
  242. ...
  243. StopIteration
  244. """
  245. self._cache.extendleft(reversed(items))
  246. def __next__(self):
  247. if self._cache:
  248. return self._cache.popleft()
  249. return next(self._it)
  250. next = __next__ # For Python 2 compatibility
  251. def _get_slice(self, index):
  252. # Normalize the slice's arguments
  253. step = 1 if (index.step is None) else index.step
  254. if step > 0:
  255. start = 0 if (index.start is None) else index.start
  256. stop = maxsize if (index.stop is None) else index.stop
  257. elif step < 0:
  258. start = -1 if (index.start is None) else index.start
  259. stop = (-maxsize - 1) if (index.stop is None) else index.stop
  260. else:
  261. raise ValueError('slice step cannot be zero')
  262. # If either the start or stop index is negative, we'll need to cache
  263. # the rest of the iterable in order to slice from the right side.
  264. if (start < 0) or (stop < 0):
  265. self._cache.extend(self._it)
  266. # Otherwise we'll need to find the rightmost index and cache to that
  267. # point.
  268. else:
  269. n = min(max(start, stop) + 1, maxsize)
  270. cache_len = len(self._cache)
  271. if n >= cache_len:
  272. self._cache.extend(islice(self._it, n - cache_len))
  273. return list(self._cache)[index]
  274. def __getitem__(self, index):
  275. if isinstance(index, slice):
  276. return self._get_slice(index)
  277. cache_len = len(self._cache)
  278. if index < 0:
  279. self._cache.extend(self._it)
  280. elif index >= cache_len:
  281. self._cache.extend(islice(self._it, index + 1 - cache_len))
  282. return self._cache[index]
  283. def _collate(*iterables, **kwargs):
  284. """Helper for ``collate()``, called when the user is using the ``reverse``
  285. or ``key`` keyword arguments on Python versions below 3.5.
  286. """
  287. key = kwargs.pop('key', lambda a: a)
  288. reverse = kwargs.pop('reverse', False)
  289. min_or_max = partial(max if reverse else min, key=itemgetter(0))
  290. peekables = [peekable(it) for it in iterables]
  291. peekables = [p for p in peekables if p] # Kill empties.
  292. while peekables:
  293. _, p = min_or_max((key(p.peek()), p) for p in peekables)
  294. yield next(p)
  295. peekables = [x for x in peekables if x]
  296. def collate(*iterables, **kwargs):
  297. """Return a sorted merge of the items from each of several already-sorted
  298. *iterables*.
  299. >>> list(collate('ACDZ', 'AZ', 'JKL'))
  300. ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z']
  301. Works lazily, keeping only the next value from each iterable in memory. Use
  302. :func:`collate` to, for example, perform a n-way mergesort of items that
  303. don't fit in memory.
  304. If a *key* function is specified, the iterables will be sorted according
  305. to its result:
  306. >>> key = lambda s: int(s) # Sort by numeric value, not by string
  307. >>> list(collate(['1', '10'], ['2', '11'], key=key))
  308. ['1', '2', '10', '11']
  309. If the *iterables* are sorted in descending order, set *reverse* to
  310. ``True``:
  311. >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True))
  312. [5, 4, 3, 2, 1, 0]
  313. If the elements of the passed-in iterables are out of order, you might get
  314. unexpected results.
  315. On Python 2.7, this function delegates to :func:`heapq.merge` if neither
  316. of the keyword arguments are specified. On Python 3.5+, this function
  317. is an alias for :func:`heapq.merge`.
  318. """
  319. if not kwargs:
  320. return merge(*iterables)
  321. return _collate(*iterables, **kwargs)
  322. # If using Python version 3.5 or greater, heapq.merge() will be faster than
  323. # collate - use that instead.
  324. if version_info >= (3, 5, 0):
  325. _collate_docstring = collate.__doc__
  326. collate = partial(merge)
  327. collate.__doc__ = _collate_docstring
  328. def consumer(func):
  329. """Decorator that automatically advances a PEP-342-style "reverse iterator"
  330. to its first yield point so you don't have to call ``next()`` on it
  331. manually.
  332. >>> @consumer
  333. ... def tally():
  334. ... i = 0
  335. ... while True:
  336. ... print('Thing number %s is %s.' % (i, (yield)))
  337. ... i += 1
  338. ...
  339. >>> t = tally()
  340. >>> t.send('red')
  341. Thing number 0 is red.
  342. >>> t.send('fish')
  343. Thing number 1 is fish.
  344. Without the decorator, you would have to call ``next(t)`` before
  345. ``t.send()`` could be used.
  346. """
  347. @wraps(func)
  348. def wrapper(*args, **kwargs):
  349. gen = func(*args, **kwargs)
  350. next(gen)
  351. return gen
  352. return wrapper
  353. def ilen(iterable):
  354. """Return the number of items in *iterable*.
  355. >>> ilen(x for x in range(1000000) if x % 3 == 0)
  356. 333334
  357. This consumes the iterable, so handle with care.
  358. """
  359. # This approach was selected because benchmarks showed it's likely the
  360. # fastest of the known implementations at the time of writing.
  361. # See GitHub tracker: #236, #230.
  362. counter = count()
  363. deque(zip(iterable, counter), maxlen=0)
  364. return next(counter)
  365. def iterate(func, start):
  366. """Return ``start``, ``func(start)``, ``func(func(start))``, ...
  367. >>> from itertools import islice
  368. >>> list(islice(iterate(lambda x: 2*x, 1), 10))
  369. [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
  370. """
  371. while True:
  372. yield start
  373. start = func(start)
  374. def with_iter(context_manager):
  375. """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
  376. For example, this will close the file when the iterator is exhausted::
  377. upper_lines = (line.upper() for line in with_iter(open('foo')))
  378. Any context manager which returns an iterable is a candidate for
  379. ``with_iter``.
  380. """
  381. with context_manager as iterable:
  382. for item in iterable:
  383. yield item
  384. def one(iterable, too_short=None, too_long=None):
  385. """Return the first item from *iterable*, which is expected to contain only
  386. that item. Raise an exception if *iterable* is empty or has more than one
  387. item.
  388. :func:`one` is useful for ensuring that an iterable contains only one item.
  389. For example, it can be used to retrieve the result of a database query
  390. that is expected to return a single row.
  391. If *iterable* is empty, ``ValueError`` will be raised. You may specify a
  392. different exception with the *too_short* keyword:
  393. >>> it = []
  394. >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
  395. Traceback (most recent call last):
  396. ...
  397. ValueError: too many items in iterable (expected 1)'
  398. >>> too_short = IndexError('too few items')
  399. >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
  400. Traceback (most recent call last):
  401. ...
  402. IndexError: too few items
  403. Similarly, if *iterable* contains more than one item, ``ValueError`` will
  404. be raised. You may specify a different exception with the *too_long*
  405. keyword:
  406. >>> it = ['too', 'many']
  407. >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
  408. Traceback (most recent call last):
  409. ...
  410. ValueError: too many items in iterable (expected 1)'
  411. >>> too_long = RuntimeError
  412. >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
  413. Traceback (most recent call last):
  414. ...
  415. RuntimeError
  416. Note that :func:`one` attempts to advance *iterable* twice to ensure there
  417. is only one item. If there is more than one, both items will be discarded.
  418. See :func:`spy` or :func:`peekable` to check iterable contents less
  419. destructively.
  420. """
  421. it = iter(iterable)
  422. try:
  423. value = next(it)
  424. except StopIteration:
  425. raise too_short or ValueError('too few items in iterable (expected 1)')
  426. try:
  427. next(it)
  428. except StopIteration:
  429. pass
  430. else:
  431. raise too_long or ValueError('too many items in iterable (expected 1)')
  432. return value
  433. def distinct_permutations(iterable):
  434. """Yield successive distinct permutations of the elements in *iterable*.
  435. >>> sorted(distinct_permutations([1, 0, 1]))
  436. [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
  437. Equivalent to ``set(permutations(iterable))``, except duplicates are not
  438. generated and thrown away. For larger input sequences this is much more
  439. efficient.
  440. Duplicate permutations arise when there are duplicated elements in the
  441. input iterable. The number of items returned is
  442. `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
  443. items input, and each `x_i` is the count of a distinct item in the input
  444. sequence.
  445. """
  446. def perm_unique_helper(item_counts, perm, i):
  447. """Internal helper function
  448. :arg item_counts: Stores the unique items in ``iterable`` and how many
  449. times they are repeated
  450. :arg perm: The permutation that is being built for output
  451. :arg i: The index of the permutation being modified
  452. The output permutations are built up recursively; the distinct items
  453. are placed until their repetitions are exhausted.
  454. """
  455. if i < 0:
  456. yield tuple(perm)
  457. else:
  458. for item in item_counts:
  459. if item_counts[item] <= 0:
  460. continue
  461. perm[i] = item
  462. item_counts[item] -= 1
  463. for x in perm_unique_helper(item_counts, perm, i - 1):
  464. yield x
  465. item_counts[item] += 1
  466. item_counts = Counter(iterable)
  467. length = sum(item_counts.values())
  468. return perm_unique_helper(item_counts, [None] * length, length - 1)
  469. def intersperse(e, iterable, n=1):
  470. """Intersperse filler element *e* among the items in *iterable*, leaving
  471. *n* items between each filler element.
  472. >>> list(intersperse('!', [1, 2, 3, 4, 5]))
  473. [1, '!', 2, '!', 3, '!', 4, '!', 5]
  474. >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
  475. [1, 2, None, 3, 4, None, 5]
  476. """
  477. if n == 0:
  478. raise ValueError('n must be > 0')
  479. elif n == 1:
  480. # interleave(repeat(e), iterable) -> e, x_0, e, e, x_1, e, x_2...
  481. # islice(..., 1, None) -> x_0, e, e, x_1, e, x_2...
  482. return islice(interleave(repeat(e), iterable), 1, None)
  483. else:
  484. # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
  485. # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
  486. # flatten(...) -> x_0, x_1, e, x_2, x_3...
  487. filler = repeat([e])
  488. chunks = chunked(iterable, n)
  489. return flatten(islice(interleave(filler, chunks), 1, None))
  490. def unique_to_each(*iterables):
  491. """Return the elements from each of the input iterables that aren't in the
  492. other input iterables.
  493. For example, suppose you have a set of packages, each with a set of
  494. dependencies::
  495. {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
  496. If you remove one package, which dependencies can also be removed?
  497. If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
  498. associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
  499. ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
  500. >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
  501. [['A'], ['C'], ['D']]
  502. If there are duplicates in one input iterable that aren't in the others
  503. they will be duplicated in the output. Input order is preserved::
  504. >>> unique_to_each("mississippi", "missouri")
  505. [['p', 'p'], ['o', 'u', 'r']]
  506. It is assumed that the elements of each iterable are hashable.
  507. """
  508. pool = [list(it) for it in iterables]
  509. counts = Counter(chain.from_iterable(map(set, pool)))
  510. uniques = {element for element in counts if counts[element] == 1}
  511. return [list(filter(uniques.__contains__, it)) for it in pool]
  512. def windowed(seq, n, fillvalue=None, step=1):
  513. """Return a sliding window of width *n* over the given iterable.
  514. >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
  515. >>> list(all_windows)
  516. [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
  517. When the window is larger than the iterable, *fillvalue* is used in place
  518. of missing values::
  519. >>> list(windowed([1, 2, 3], 4))
  520. [(1, 2, 3, None)]
  521. Each window will advance in increments of *step*:
  522. >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
  523. [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
  524. """
  525. if n < 0:
  526. raise ValueError('n must be >= 0')
  527. if n == 0:
  528. yield tuple()
  529. return
  530. if step < 1:
  531. raise ValueError('step must be >= 1')
  532. it = iter(seq)
  533. window = deque([], n)
  534. append = window.append
  535. # Initial deque fill
  536. for _ in range(n):
  537. append(next(it, fillvalue))
  538. yield tuple(window)
  539. # Appending new items to the right causes old items to fall off the left
  540. i = 0
  541. for item in it:
  542. append(item)
  543. i = (i + 1) % step
  544. if i % step == 0:
  545. yield tuple(window)
  546. # If there are items from the iterable in the window, pad with the given
  547. # value and emit them.
  548. if (i % step) and (step - i < n):
  549. for _ in range(step - i):
  550. append(fillvalue)
  551. yield tuple(window)
  552. def substrings(iterable, join_func=None):
  553. """Yield all of the substrings of *iterable*.
  554. >>> [''.join(s) for s in substrings('more')]
  555. ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
  556. Note that non-string iterables can also be subdivided.
  557. >>> list(substrings([0, 1, 2]))
  558. [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
  559. """
  560. # The length-1 substrings
  561. seq = []
  562. for item in iter(iterable):
  563. seq.append(item)
  564. yield (item,)
  565. seq = tuple(seq)
  566. item_count = len(seq)
  567. # And the rest
  568. for n in range(2, item_count + 1):
  569. for i in range(item_count - n + 1):
  570. yield seq[i:i + n]
  571. class bucket(object):
  572. """Wrap *iterable* and return an object that buckets it iterable into
  573. child iterables based on a *key* function.
  574. >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
  575. >>> s = bucket(iterable, key=lambda x: x[0])
  576. >>> a_iterable = s['a']
  577. >>> next(a_iterable)
  578. 'a1'
  579. >>> next(a_iterable)
  580. 'a2'
  581. >>> list(s['b'])
  582. ['b1', 'b2', 'b3']
  583. The original iterable will be advanced and its items will be cached until
  584. they are used by the child iterables. This may require significant storage.
  585. By default, attempting to select a bucket to which no items belong will
  586. exhaust the iterable and cache all values.
  587. If you specify a *validator* function, selected buckets will instead be
  588. checked against it.
  589. >>> from itertools import count
  590. >>> it = count(1, 2) # Infinite sequence of odd numbers
  591. >>> key = lambda x: x % 10 # Bucket by last digit
  592. >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
  593. >>> s = bucket(it, key=key, validator=validator)
  594. >>> 2 in s
  595. False
  596. >>> list(s[2])
  597. []
  598. """
  599. def __init__(self, iterable, key, validator=None):
  600. self._it = iter(iterable)
  601. self._key = key
  602. self._cache = defaultdict(deque)
  603. self._validator = validator or (lambda x: True)
  604. def __contains__(self, value):
  605. if not self._validator(value):
  606. return False
  607. try:
  608. item = next(self[value])
  609. except StopIteration:
  610. return False
  611. else:
  612. self._cache[value].appendleft(item)
  613. return True
  614. def _get_values(self, value):
  615. """
  616. Helper to yield items from the parent iterator that match *value*.
  617. Items that don't match are stored in the local cache as they
  618. are encountered.
  619. """
  620. while True:
  621. # If we've cached some items that match the target value, emit
  622. # the first one and evict it from the cache.
  623. if self._cache[value]:
  624. yield self._cache[value].popleft()
  625. # Otherwise we need to advance the parent iterator to search for
  626. # a matching item, caching the rest.
  627. else:
  628. while True:
  629. try:
  630. item = next(self._it)
  631. except StopIteration:
  632. return
  633. item_value = self._key(item)
  634. if item_value == value:
  635. yield item
  636. break
  637. elif self._validator(item_value):
  638. self._cache[item_value].append(item)
  639. def __getitem__(self, value):
  640. if not self._validator(value):
  641. return iter(())
  642. return self._get_values(value)
  643. def spy(iterable, n=1):
  644. """Return a 2-tuple with a list containing the first *n* elements of
  645. *iterable*, and an iterator with the same items as *iterable*.
  646. This allows you to "look ahead" at the items in the iterable without
  647. advancing it.
  648. There is one item in the list by default:
  649. >>> iterable = 'abcdefg'
  650. >>> head, iterable = spy(iterable)
  651. >>> head
  652. ['a']
  653. >>> list(iterable)
  654. ['a', 'b', 'c', 'd', 'e', 'f', 'g']
  655. You may use unpacking to retrieve items instead of lists:
  656. >>> (head,), iterable = spy('abcdefg')
  657. >>> head
  658. 'a'
  659. >>> (first, second), iterable = spy('abcdefg', 2)
  660. >>> first
  661. 'a'
  662. >>> second
  663. 'b'
  664. The number of items requested can be larger than the number of items in
  665. the iterable:
  666. >>> iterable = [1, 2, 3, 4, 5]
  667. >>> head, iterable = spy(iterable, 10)
  668. >>> head
  669. [1, 2, 3, 4, 5]
  670. >>> list(iterable)
  671. [1, 2, 3, 4, 5]
  672. """
  673. it = iter(iterable)
  674. head = take(n, it)
  675. return head, chain(head, it)
  676. def interleave(*iterables):
  677. """Return a new iterable yielding from each iterable in turn,
  678. until the shortest is exhausted.
  679. >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
  680. [1, 4, 6, 2, 5, 7]
  681. For a version that doesn't terminate after the shortest iterable is
  682. exhausted, see :func:`interleave_longest`.
  683. """
  684. return chain.from_iterable(zip(*iterables))
  685. def interleave_longest(*iterables):
  686. """Return a new iterable yielding from each iterable in turn,
  687. skipping any that are exhausted.
  688. >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
  689. [1, 4, 6, 2, 5, 7, 3, 8]
  690. This function produces the same output as :func:`roundrobin`, but may
  691. perform better for some inputs (in particular when the number of iterables
  692. is large).
  693. """
  694. i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker))
  695. return (x for x in i if x is not _marker)
  696. def collapse(iterable, base_type=None, levels=None):
  697. """Flatten an iterable with multiple levels of nesting (e.g., a list of
  698. lists of tuples) into non-iterable types.
  699. >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
  700. >>> list(collapse(iterable))
  701. [1, 2, 3, 4, 5, 6]
  702. String types are not considered iterable and will not be collapsed.
  703. To avoid collapsing other types, specify *base_type*:
  704. >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
  705. >>> list(collapse(iterable, base_type=tuple))
  706. ['ab', ('cd', 'ef'), 'gh', 'ij']
  707. Specify *levels* to stop flattening after a certain level:
  708. >>> iterable = [('a', ['b']), ('c', ['d'])]
  709. >>> list(collapse(iterable)) # Fully flattened
  710. ['a', 'b', 'c', 'd']
  711. >>> list(collapse(iterable, levels=1)) # Only one level flattened
  712. ['a', ['b'], 'c', ['d']]
  713. """
  714. def walk(node, level):
  715. if (
  716. ((levels is not None) and (level > levels)) or
  717. isinstance(node, string_types) or
  718. ((base_type is not None) and isinstance(node, base_type))
  719. ):
  720. yield node
  721. return
  722. try:
  723. tree = iter(node)
  724. except TypeError:
  725. yield node
  726. return
  727. else:
  728. for child in tree:
  729. for x in walk(child, level + 1):
  730. yield x
  731. for x in walk(iterable, 0):
  732. yield x
  733. def side_effect(func, iterable, chunk_size=None, before=None, after=None):
  734. """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
  735. of items) before yielding the item.
  736. `func` must be a function that takes a single argument. Its return value
  737. will be discarded.
  738. *before* and *after* are optional functions that take no arguments. They
  739. will be executed before iteration starts and after it ends, respectively.
  740. `side_effect` can be used for logging, updating progress bars, or anything
  741. that is not functionally "pure."
  742. Emitting a status message:
  743. >>> from more_itertools import consume
  744. >>> func = lambda item: print('Received {}'.format(item))
  745. >>> consume(side_effect(func, range(2)))
  746. Received 0
  747. Received 1
  748. Operating on chunks of items:
  749. >>> pair_sums = []
  750. >>> func = lambda chunk: pair_sums.append(sum(chunk))
  751. >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
  752. [0, 1, 2, 3, 4, 5]
  753. >>> list(pair_sums)
  754. [1, 5, 9]
  755. Writing to a file-like object:
  756. >>> from io import StringIO
  757. >>> from more_itertools import consume
  758. >>> f = StringIO()
  759. >>> func = lambda x: print(x, file=f)
  760. >>> before = lambda: print(u'HEADER', file=f)
  761. >>> after = f.close
  762. >>> it = [u'a', u'b', u'c']
  763. >>> consume(side_effect(func, it, before=before, after=after))
  764. >>> f.closed
  765. True
  766. """
  767. try:
  768. if before is not None:
  769. before()
  770. if chunk_size is None:
  771. for item in iterable:
  772. func(item)
  773. yield item
  774. else:
  775. for chunk in chunked(iterable, chunk_size):
  776. func(chunk)
  777. for item in chunk:
  778. yield item
  779. finally:
  780. if after is not None:
  781. after()
  782. def sliced(seq, n):
  783. """Yield slices of length *n* from the sequence *seq*.
  784. >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
  785. [(1, 2, 3), (4, 5, 6)]
  786. If the length of the sequence is not divisible by the requested slice
  787. length, the last slice will be shorter.
  788. >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
  789. [(1, 2, 3), (4, 5, 6), (7, 8)]
  790. This function will only work for iterables that support slicing.
  791. For non-sliceable iterables, see :func:`chunked`.
  792. """
  793. return takewhile(bool, (seq[i: i + n] for i in count(0, n)))
  794. def split_at(iterable, pred):
  795. """Yield lists of items from *iterable*, where each list is delimited by
  796. an item where callable *pred* returns ``True``. The lists do not include
  797. the delimiting items.
  798. >>> list(split_at('abcdcba', lambda x: x == 'b'))
  799. [['a'], ['c', 'd', 'c'], ['a']]
  800. >>> list(split_at(range(10), lambda n: n % 2 == 1))
  801. [[0], [2], [4], [6], [8], []]
  802. """
  803. buf = []
  804. for item in iterable:
  805. if pred(item):
  806. yield buf
  807. buf = []
  808. else:
  809. buf.append(item)
  810. yield buf
  811. def split_before(iterable, pred):
  812. """Yield lists of items from *iterable*, where each list starts with an
  813. item where callable *pred* returns ``True``:
  814. >>> list(split_before('OneTwo', lambda s: s.isupper()))
  815. [['O', 'n', 'e'], ['T', 'w', 'o']]
  816. >>> list(split_before(range(10), lambda n: n % 3 == 0))
  817. [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
  818. """
  819. buf = []
  820. for item in iterable:
  821. if pred(item) and buf:
  822. yield buf
  823. buf = []
  824. buf.append(item)
  825. yield buf
  826. def split_after(iterable, pred):
  827. """Yield lists of items from *iterable*, where each list ends with an
  828. item where callable *pred* returns ``True``:
  829. >>> list(split_after('one1two2', lambda s: s.isdigit()))
  830. [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
  831. >>> list(split_after(range(10), lambda n: n % 3 == 0))
  832. [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
  833. """
  834. buf = []
  835. for item in iterable:
  836. buf.append(item)
  837. if pred(item) and buf:
  838. yield buf
  839. buf = []
  840. if buf:
  841. yield buf
  842. def split_into(iterable, sizes):
  843. """Yield a list of sequential items from *iterable* of length 'n' for each
  844. integer 'n' in *sizes*.
  845. >>> list(split_into([1,2,3,4,5,6], [1,2,3]))
  846. [[1], [2, 3], [4, 5, 6]]
  847. If the sum of *sizes* is smaller than the length of *iterable*, then the
  848. remaining items of *iterable* will not be returned.
  849. >>> list(split_into([1,2,3,4,5,6], [2,3]))
  850. [[1, 2], [3, 4, 5]]
  851. If the sum of *sizes* is larger than the length of *iterable*, fewer items
  852. will be returned in the iteration that overruns *iterable* and further
  853. lists will be empty:
  854. >>> list(split_into([1,2,3,4], [1,2,3,4]))
  855. [[1], [2, 3], [4], []]
  856. When a ``None`` object is encountered in *sizes*, the returned list will
  857. contain items up to the end of *iterable* the same way that itertools.slice
  858. does:
  859. >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
  860. [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
  861. :func:`split_into` can be useful for grouping a series of items where the
  862. sizes of the groups are not uniform. An example would be where in a row
  863. from a table, multiple columns represent elements of the same feature
  864. (e.g. a point represented by x,y,z) but, the format is not the same for
  865. all columns.
  866. """
  867. # convert the iterable argument into an iterator so its contents can
  868. # be consumed by islice in case it is a generator
  869. it = iter(iterable)
  870. for size in sizes:
  871. if size is None:
  872. yield list(it)
  873. return
  874. else:
  875. yield list(islice(it, size))
  876. def padded(iterable, fillvalue=None, n=None, next_multiple=False):
  877. """Yield the elements from *iterable*, followed by *fillvalue*, such that
  878. at least *n* items are emitted.
  879. >>> list(padded([1, 2, 3], '?', 5))
  880. [1, 2, 3, '?', '?']
  881. If *next_multiple* is ``True``, *fillvalue* will be emitted until the
  882. number of items emitted is a multiple of *n*::
  883. >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
  884. [1, 2, 3, 4, None, None]
  885. If *n* is ``None``, *fillvalue* will be emitted indefinitely.
  886. """
  887. it = iter(iterable)
  888. if n is None:
  889. for item in chain(it, repeat(fillvalue)):
  890. yield item
  891. elif n < 1:
  892. raise ValueError('n must be at least 1')
  893. else:
  894. item_count = 0
  895. for item in it:
  896. yield item
  897. item_count += 1
  898. remaining = (n - item_count) % n if next_multiple else n - item_count
  899. for _ in range(remaining):
  900. yield fillvalue
  901. def distribute(n, iterable):
  902. """Distribute the items from *iterable* among *n* smaller iterables.
  903. >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
  904. >>> list(group_1)
  905. [1, 3, 5]
  906. >>> list(group_2)
  907. [2, 4, 6]
  908. If the length of *iterable* is not evenly divisible by *n*, then the
  909. length of the returned iterables will not be identical:
  910. >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
  911. >>> [list(c) for c in children]
  912. [[1, 4, 7], [2, 5], [3, 6]]
  913. If the length of *iterable* is smaller than *n*, then the last returned
  914. iterables will be empty:
  915. >>> children = distribute(5, [1, 2, 3])
  916. >>> [list(c) for c in children]
  917. [[1], [2], [3], [], []]
  918. This function uses :func:`itertools.tee` and may require significant
  919. storage. If you need the order items in the smaller iterables to match the
  920. original iterable, see :func:`divide`.
  921. """
  922. if n < 1:
  923. raise ValueError('n must be at least 1')
  924. children = tee(iterable, n)
  925. return [islice(it, index, None, n) for index, it in enumerate(children)]
  926. def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
  927. """Yield tuples whose elements are offset from *iterable*.
  928. The amount by which the `i`-th item in each tuple is offset is given by
  929. the `i`-th item in *offsets*.
  930. >>> list(stagger([0, 1, 2, 3]))
  931. [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
  932. >>> list(stagger(range(8), offsets=(0, 2, 4)))
  933. [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
  934. By default, the sequence will end when the final element of a tuple is the
  935. last item in the iterable. To continue until the first element of a tuple
  936. is the last item in the iterable, set *longest* to ``True``::
  937. >>> list(stagger([0, 1, 2, 3], longest=True))
  938. [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
  939. By default, ``None`` will be used to replace offsets beyond the end of the
  940. sequence. Specify *fillvalue* to use some other value.
  941. """
  942. children = tee(iterable, len(offsets))
  943. return zip_offset(
  944. *children, offsets=offsets, longest=longest, fillvalue=fillvalue
  945. )
  946. def zip_offset(*iterables, **kwargs):
  947. """``zip`` the input *iterables* together, but offset the `i`-th iterable
  948. by the `i`-th item in *offsets*.
  949. >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
  950. [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
  951. This can be used as a lightweight alternative to SciPy or pandas to analyze
  952. data sets in which some series have a lead or lag relationship.
  953. By default, the sequence will end when the shortest iterable is exhausted.
  954. To continue until the longest iterable is exhausted, set *longest* to
  955. ``True``.
  956. >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
  957. [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
  958. By default, ``None`` will be used to replace offsets beyond the end of the
  959. sequence. Specify *fillvalue* to use some other value.
  960. """
  961. offsets = kwargs['offsets']
  962. longest = kwargs.get('longest', False)
  963. fillvalue = kwargs.get('fillvalue', None)
  964. if len(iterables) != len(offsets):
  965. raise ValueError("Number of iterables and offsets didn't match")
  966. staggered = []
  967. for it, n in zip(iterables, offsets):
  968. if n < 0:
  969. staggered.append(chain(repeat(fillvalue, -n), it))
  970. elif n > 0:
  971. staggered.append(islice(it, n, None))
  972. else:
  973. staggered.append(it)
  974. if longest:
  975. return zip_longest(*staggered, fillvalue=fillvalue)
  976. return zip(*staggered)
  977. def sort_together(iterables, key_list=(0,), reverse=False):
  978. """Return the input iterables sorted together, with *key_list* as the
  979. priority for sorting. All iterables are trimmed to the length of the
  980. shortest one.
  981. This can be used like the sorting function in a spreadsheet. If each
  982. iterable represents a column of data, the key list determines which
  983. columns are used for sorting.
  984. By default, all iterables are sorted using the ``0``-th iterable::
  985. >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
  986. >>> sort_together(iterables)
  987. [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
  988. Set a different key list to sort according to another iterable.
  989. Specifying multiple keys dictates how ties are broken::
  990. >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
  991. >>> sort_together(iterables, key_list=(1, 2))
  992. [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
  993. Set *reverse* to ``True`` to sort in descending order.
  994. >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
  995. [(3, 2, 1), ('a', 'b', 'c')]
  996. """
  997. return list(zip(*sorted(zip(*iterables),
  998. key=itemgetter(*key_list),
  999. reverse=reverse)))
  1000. def unzip(iterable):
  1001. """The inverse of :func:`zip`, this function disaggregates the elements
  1002. of the zipped *iterable*.
  1003. The ``i``-th iterable contains the ``i``-th element from each element
  1004. of the zipped iterable. The first element is used to to determine the
  1005. length of the remaining elements.
  1006. >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
  1007. >>> letters, numbers = unzip(iterable)
  1008. >>> list(letters)
  1009. ['a', 'b', 'c', 'd']
  1010. >>> list(numbers)
  1011. [1, 2, 3, 4]
  1012. This is similar to using ``zip(*iterable)``, but it avoids reading
  1013. *iterable* into memory. Note, however, that this function uses
  1014. :func:`itertools.tee` and thus may require significant storage.
  1015. """
  1016. head, iterable = spy(iter(iterable))
  1017. if not head:
  1018. # empty iterable, e.g. zip([], [], [])
  1019. return ()
  1020. # spy returns a one-length iterable as head
  1021. head = head[0]
  1022. iterables = tee(iterable, len(head))
  1023. def itemgetter(i):
  1024. def getter(obj):
  1025. try:
  1026. return obj[i]
  1027. except IndexError:
  1028. # basically if we have an iterable like
  1029. # iter([(1, 2, 3), (4, 5), (6,)])
  1030. # the second unzipped iterable would fail at the third tuple
  1031. # since it would try to access tup[1]
  1032. # same with the third unzipped iterable and the second tuple
  1033. # to support these "improperly zipped" iterables,
  1034. # we create a custom itemgetter
  1035. # which just stops the unzipped iterables
  1036. # at first length mismatch
  1037. raise StopIteration
  1038. return getter
  1039. return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))
  1040. def divide(n, iterable):
  1041. """Divide the elements from *iterable* into *n* parts, maintaining
  1042. order.
  1043. >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
  1044. >>> list(group_1)
  1045. [1, 2, 3]
  1046. >>> list(group_2)
  1047. [4, 5, 6]
  1048. If the length of *iterable* is not evenly divisible by *n*, then the
  1049. length of the returned iterables will not be identical:
  1050. >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
  1051. >>> [list(c) for c in children]
  1052. [[1, 2, 3], [4, 5], [6, 7]]
  1053. If the length of the iterable is smaller than n, then the last returned
  1054. iterables will be empty:
  1055. >>> children = divide(5, [1, 2, 3])
  1056. >>> [list(c) for c in children]
  1057. [[1], [2], [3], [], []]
  1058. This function will exhaust the iterable before returning and may require
  1059. significant storage. If order is not important, see :func:`distribute`,
  1060. which does not first pull the iterable into memory.
  1061. """
  1062. if n < 1:
  1063. raise ValueError('n must be at least 1')
  1064. seq = tuple(iterable)
  1065. q, r = divmod(len(seq), n)
  1066. ret = []
  1067. for i in range(n):
  1068. start = (i * q) + (i if i < r else r)
  1069. stop = ((i + 1) * q) + (i + 1 if i + 1 < r else r)
  1070. ret.append(iter(seq[start:stop]))
  1071. return ret
  1072. def always_iterable(obj, base_type=(text_type, binary_type)):
  1073. """If *obj* is iterable, return an iterator over its items::
  1074. >>> obj = (1, 2, 3)
  1075. >>> list(always_iterable(obj))
  1076. [1, 2, 3]
  1077. If *obj* is not iterable, return a one-item iterable containing *obj*::
  1078. >>> obj = 1
  1079. >>> list(always_iterable(obj))
  1080. [1]
  1081. If *obj* is ``None``, return an empty iterable:
  1082. >>> obj = None
  1083. >>> list(always_iterable(None))
  1084. []
  1085. By default, binary and text strings are not considered iterable::
  1086. >>> obj = 'foo'
  1087. >>> list(always_iterable(obj))
  1088. ['foo']
  1089. If *base_type* is set, objects for which ``isinstance(obj, base_type)``
  1090. returns ``True`` won't be considered iterable.
  1091. >>> obj = {'a': 1}
  1092. >>> list(always_iterable(obj)) # Iterate over the dict's keys
  1093. ['a']
  1094. >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
  1095. [{'a': 1}]
  1096. Set *base_type* to ``None`` to avoid any special handling and treat objects
  1097. Python considers iterable as iterable:
  1098. >>> obj = 'foo'
  1099. >>> list(always_iterable(obj, base_type=None))
  1100. ['f', 'o', 'o']
  1101. """
  1102. if obj is None:
  1103. return iter(())
  1104. if (base_type is not None) and isinstance(obj, base_type):
  1105. return iter((obj,))
  1106. try:
  1107. return iter(obj)
  1108. except TypeError:
  1109. return iter((obj,))
  1110. def adjacent(predicate, iterable, distance=1):
  1111. """Return an iterable over `(bool, item)` tuples where the `item` is
  1112. drawn from *iterable* and the `bool` indicates whether
  1113. that item satisfies the *predicate* or is adjacent to an item that does.
  1114. For example, to find whether items are adjacent to a ``3``::
  1115. >>> list(adjacent(lambda x: x == 3, range(6)))
  1116. [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
  1117. Set *distance* to change what counts as adjacent. For example, to find
  1118. whether items are two places away from a ``3``:
  1119. >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
  1120. [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
  1121. This is useful for contextualizing the results of a search function.
  1122. For example, a code comparison tool might want to identify lines that
  1123. have changed, but also surrounding lines to give the viewer of the diff
  1124. context.
  1125. The predicate function will only be called once for each item in the
  1126. iterable.
  1127. See also :func:`groupby_transform`, which can be used with this function
  1128. to group ranges of items with the same `bool` value.
  1129. """
  1130. # Allow distance=0 mainly for testing that it reproduces results with map()
  1131. if distance < 0:
  1132. raise ValueError('distance must be at least 0')
  1133. i1, i2 = tee(iterable)
  1134. padding = [False] * distance
  1135. selected = chain(padding, map(predicate, i1), padding)
  1136. adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
  1137. return zip(adjacent_to_selected, i2)
  1138. def groupby_transform(iterable, keyfunc=None, valuefunc=None):
  1139. """An extension of :func:`itertools.groupby` that transforms the values of
  1140. *iterable* after grouping them.
  1141. *keyfunc* is a function used to compute a grouping key for each item.
  1142. *valuefunc* is a function for transforming the items after grouping.
  1143. >>> iterable = 'AaaABbBCcA'
  1144. >>> keyfunc = lambda x: x.upper()
  1145. >>> valuefunc = lambda x: x.lower()
  1146. >>> grouper = groupby_transform(iterable, keyfunc, valuefunc)
  1147. >>> [(k, ''.join(g)) for k, g in grouper]
  1148. [('A', 'aaaa'), ('B', 'bbb'), ('C', 'cc'), ('A', 'a')]
  1149. *keyfunc* and *valuefunc* default to identity functions if they are not
  1150. specified.
  1151. :func:`groupby_transform` is useful when grouping elements of an iterable
  1152. using a separate iterable as the key. To do this, :func:`zip` the iterables
  1153. and pass a *keyfunc* that extracts the first element and a *valuefunc*
  1154. that extracts the second element::
  1155. >>> from operator import itemgetter
  1156. >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
  1157. >>> values = 'abcdefghi'
  1158. >>> iterable = zip(keys, values)
  1159. >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
  1160. >>> [(k, ''.join(g)) for k, g in grouper]
  1161. [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
  1162. Note that the order of items in the iterable is significant.
  1163. Only adjacent items are grouped together, so if you don't want any
  1164. duplicate groups, you should sort the iterable by the key function.
  1165. """
  1166. valuefunc = (lambda x: x) if valuefunc is None else valuefunc
  1167. return ((k, map(valuefunc, g)) for k, g in groupby(iterable, keyfunc))
  1168. def numeric_range(*args):
  1169. """An extension of the built-in ``range()`` function whose arguments can
  1170. be any orderable numeric type.
  1171. With only *stop* specified, *start* defaults to ``0`` and *step*
  1172. defaults to ``1``. The output items will match the type of *stop*:
  1173. >>> list(numeric_range(3.5))
  1174. [0.0, 1.0, 2.0, 3.0]
  1175. With only *start* and *stop* specified, *step* defaults to ``1``. The
  1176. output items will match the type of *start*:
  1177. >>> from decimal import Decimal
  1178. >>> start = Decimal('2.1')
  1179. >>> stop = Decimal('5.1')
  1180. >>> list(numeric_range(start, stop))
  1181. [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
  1182. With *start*, *stop*, and *step* specified the output items will match
  1183. the type of ``start + step``:
  1184. >>> from fractions import Fraction
  1185. >>> start = Fraction(1, 2) # Start at 1/2
  1186. >>> stop = Fraction(5, 2) # End at 5/2
  1187. >>> step = Fraction(1, 2) # Count by 1/2
  1188. >>> list(numeric_range(start, stop, step))
  1189. [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
  1190. If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
  1191. >>> list(numeric_range(3, -1, -1.0))
  1192. [3.0, 2.0, 1.0, 0.0]
  1193. Be aware of the limitations of floating point numbers; the representation
  1194. of the yielded numbers may be surprising.
  1195. """
  1196. argc = len(args)
  1197. if argc == 1:
  1198. stop, = args
  1199. start = type(stop)(0)
  1200. step = 1
  1201. elif argc == 2:
  1202. start, stop = args
  1203. step = 1
  1204. elif argc == 3:
  1205. start, stop, step = args
  1206. else:
  1207. err_msg = 'numeric_range takes at most 3 arguments, got {}'
  1208. raise TypeError(err_msg.format(argc))
  1209. values = (start + (step * n) for n in count())
  1210. if step > 0:
  1211. return takewhile(partial(gt, stop), values)
  1212. elif step < 0:
  1213. return takewhile(partial(lt, stop), values)
  1214. else:
  1215. raise ValueError('numeric_range arg 3 must not be zero')
  1216. def count_cycle(iterable, n=None):
  1217. """Cycle through the items from *iterable* up to *n* times, yielding
  1218. the number of completed cycles along with each item. If *n* is omitted the
  1219. process repeats indefinitely.
  1220. >>> list(count_cycle('AB', 3))
  1221. [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
  1222. """
  1223. iterable = tuple(iterable)
  1224. if not iterable:
  1225. return iter(())
  1226. counter = count() if n is None else range(n)
  1227. return ((i, item) for i in counter for item in iterable)
  1228. def locate(iterable, pred=bool, window_size=None):
  1229. """Yield the index of each item in *iterable* for which *pred* returns
  1230. ``True``.
  1231. *pred* defaults to :func:`bool`, which will select truthy items:
  1232. >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
  1233. [1, 2, 4]
  1234. Set *pred* to a custom function to, e.g., find the indexes for a particular
  1235. item.
  1236. >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
  1237. [1, 3]
  1238. If *window_size* is given, then the *pred* function will be called with
  1239. that many items. This enables searching for sub-sequences:
  1240. >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
  1241. >>> pred = lambda *args: args == (1, 2, 3)
  1242. >>> list(locate(iterable, pred=pred, window_size=3))
  1243. [1, 5, 9]
  1244. Use with :func:`seekable` to find indexes and then retrieve the associated
  1245. items:
  1246. >>> from itertools import count
  1247. >>> from more_itertools import seekable
  1248. >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
  1249. >>> it = seekable(source)
  1250. >>> pred = lambda x: x > 100
  1251. >>> indexes = locate(it, pred=pred)
  1252. >>> i = next(indexes)
  1253. >>> it.seek(i)
  1254. >>> next(it)
  1255. 106
  1256. """
  1257. if window_size is None:
  1258. return compress(count(), map(pred, iterable))
  1259. if window_size < 1:
  1260. raise ValueError('window size must be at least 1')
  1261. it = windowed(iterable, window_size, fillvalue=_marker)
  1262. return compress(count(), starmap(pred, it))
  1263. def lstrip(iterable, pred):
  1264. """Yield the items from *iterable*, but strip any from the beginning
  1265. for which *pred* returns ``True``.
  1266. For example, to remove a set of items from the start of an iterable:
  1267. >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
  1268. >>> pred = lambda x: x in {None, False, ''}
  1269. >>> list(lstrip(iterable, pred))
  1270. [1, 2, None, 3, False, None]
  1271. This function is analogous to to :func:`str.lstrip`, and is essentially
  1272. an wrapper for :func:`itertools.dropwhile`.
  1273. """
  1274. return dropwhile(pred, iterable)
  1275. def rstrip(iterable, pred):
  1276. """Yield the items from *iterable*, but strip any from the end
  1277. for which *pred* returns ``True``.
  1278. For example, to remove a set of items from the end of an iterable:
  1279. >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
  1280. >>> pred = lambda x: x in {None, False, ''}
  1281. >>> list(rstrip(iterable, pred))
  1282. [None, False, None, 1, 2, None, 3]
  1283. This function is analogous to :func:`str.rstrip`.
  1284. """
  1285. cache = []
  1286. cache_append = cache.append
  1287. for x in iterable:
  1288. if pred(x):
  1289. cache_append(x)
  1290. else:
  1291. for y in cache:
  1292. yield y
  1293. del cache[:]
  1294. yield x
  1295. def strip(iterable, pred):
  1296. """Yield the items from *iterable*, but strip any from the
  1297. beginning and end for which *pred* returns ``True``.
  1298. For example, to remove a set of items from both ends of an iterable:
  1299. >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
  1300. >>> pred = lambda x: x in {None, False, ''}
  1301. >>> list(strip(iterable, pred))
  1302. [1, 2, None, 3]
  1303. This function is analogous to :func:`str.strip`.
  1304. """
  1305. return rstrip(lstrip(iterable, pred), pred)
  1306. def islice_extended(iterable, *args):
  1307. """An extension of :func:`itertools.islice` that supports negative values
  1308. for *stop*, *start*, and *step*.
  1309. >>> iterable = iter('abcdefgh')
  1310. >>> list(islice_extended(iterable, -4, -1))
  1311. ['e', 'f', 'g']
  1312. Slices with negative values require some caching of *iterable*, but this
  1313. function takes care to minimize the amount of memory required.
  1314. For example, you can use a negative step with an infinite iterator:
  1315. >>> from itertools import count
  1316. >>> list(islice_extended(count(), 110, 99, -2))
  1317. [110, 108, 106, 104, 102, 100]
  1318. """
  1319. s = slice(*args)
  1320. start = s.start
  1321. stop = s.stop
  1322. if s.step == 0:
  1323. raise ValueError('step argument must be a non-zero integer or None.')
  1324. step = s.step or 1
  1325. it = iter(iterable)
  1326. if step > 0:
  1327. start = 0 if (start is None) else start
  1328. if (start < 0):
  1329. # Consume all but the last -start items
  1330. cache = deque(enumerate(it, 1), maxlen=-start)
  1331. len_iter = cache[-1][0] if cache else 0
  1332. # Adjust start to be positive
  1333. i = max(len_iter + start, 0)
  1334. # Adjust stop to be positive
  1335. if stop is None:
  1336. j = len_iter
  1337. elif stop >= 0:
  1338. j = min(stop, len_iter)
  1339. else:
  1340. j = max(len_iter + stop, 0)
  1341. # Slice the cache
  1342. n = j - i
  1343. if n <= 0:
  1344. return
  1345. for index, item in islice(cache, 0, n, step):
  1346. yield item
  1347. elif (stop is not None) and (stop < 0):
  1348. # Advance to the start position
  1349. next(islice(it, start, start), None)
  1350. # When stop is negative, we have to carry -stop items while
  1351. # iterating
  1352. cache = deque(islice(it, -stop), maxlen=-stop)
  1353. for index, item in enumerate(it):
  1354. cached_item = cache.popleft()
  1355. if index % step == 0:
  1356. yield cached_item
  1357. cache.append(item)
  1358. else:
  1359. # When both start and stop are positive we have the normal case
  1360. for item in islice(it, start, stop, step):
  1361. yield item
  1362. else:
  1363. start = -1 if (start is None) else start
  1364. if (stop is not None) and (stop < 0):
  1365. # Consume all but the last items
  1366. n = -stop - 1
  1367. cache = deque(enumerate(it, 1), maxlen=n)
  1368. len_iter = cache[-1][0] if cache else 0
  1369. # If start and stop are both negative they are comparable and
  1370. # we can just slice. Otherwise we can adjust start to be negative
  1371. # and then slice.
  1372. if start < 0:
  1373. i, j = start, stop
  1374. else:
  1375. i, j = min(start - len_iter, -1), None
  1376. for index, item in list(cache)[i:j:step]:
  1377. yield item
  1378. else:
  1379. # Advance to the stop position
  1380. if stop is not None:
  1381. m = stop + 1
  1382. next(islice(it, m, m), None)
  1383. # stop is positive, so if start is negative they are not comparable
  1384. # and we need the rest of the items.
  1385. if start < 0:
  1386. i = start
  1387. n = None
  1388. # stop is None and start is positive, so we just need items up to
  1389. # the start index.
  1390. elif stop is None:
  1391. i = None
  1392. n = start + 1
  1393. # Both stop and start are positive, so they are comparable.
  1394. else:
  1395. i = None
  1396. n = start - stop
  1397. if n <= 0:
  1398. return
  1399. cache = list(islice(it, n))
  1400. for item in cache[i::step]:
  1401. yield item
  1402. def always_reversible(iterable):
  1403. """An extension of :func:`reversed` that supports all iterables, not
  1404. just those which implement the ``Reversible`` or ``Sequence`` protocols.
  1405. >>> print(*always_reversible(x for x in range(3)))
  1406. 2 1 0
  1407. If the iterable is already reversible, this function returns the
  1408. result of :func:`reversed()`. If the iterable is not reversible,
  1409. this function will cache the remaining items in the iterable and
  1410. yield them in reverse order, which may require significant storage.
  1411. """
  1412. try:
  1413. return reversed(iterable)
  1414. except TypeError:
  1415. return reversed(list(iterable))
  1416. def consecutive_groups(iterable, ordering=lambda x: x):
  1417. """Yield groups of consecutive items using :func:`itertools.groupby`.
  1418. The *ordering* function determines whether two items are adjacent by
  1419. returning their position.
  1420. By default, the ordering function is the identity function. This is
  1421. suitable for finding runs of numbers:
  1422. >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
  1423. >>> for group in consecutive_groups(iterable):
  1424. ... print(list(group))
  1425. [1]
  1426. [10, 11, 12]
  1427. [20]
  1428. [30, 31, 32, 33]
  1429. [40]
  1430. For finding runs of adjacent letters, try using the :meth:`index` method
  1431. of a string of letters:
  1432. >>> from string import ascii_lowercase
  1433. >>> iterable = 'abcdfgilmnop'
  1434. >>> ordering = ascii_lowercase.index
  1435. >>> for group in consecutive_groups(iterable, ordering):
  1436. ... print(list(group))
  1437. ['a', 'b', 'c', 'd']
  1438. ['f', 'g']
  1439. ['i']
  1440. ['l', 'm', 'n', 'o', 'p']
  1441. """
  1442. for k, g in groupby(
  1443. enumerate(iterable), key=lambda x: x[0] - ordering(x[1])
  1444. ):
  1445. yield map(itemgetter(1), g)
  1446. def difference(iterable, func=sub):
  1447. """By default, compute the first difference of *iterable* using
  1448. :func:`operator.sub`.
  1449. >>> iterable = [0, 1, 3, 6, 10]
  1450. >>> list(difference(iterable))
  1451. [0, 1, 2, 3, 4]
  1452. This is the opposite of :func:`accumulate`'s default behavior:
  1453. >>> from more_itertools import accumulate
  1454. >>> iterable = [0, 1, 2, 3, 4]
  1455. >>> list(accumulate(iterable))
  1456. [0, 1, 3, 6, 10]
  1457. >>> list(difference(accumulate(iterable)))
  1458. [0, 1, 2, 3, 4]
  1459. By default *func* is :func:`operator.sub`, but other functions can be
  1460. specified. They will be applied as follows::
  1461. A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
  1462. For example, to do progressive division:
  1463. >>> iterable = [1, 2, 6, 24, 120] # Factorial sequence
  1464. >>> func = lambda x, y: x // y
  1465. >>> list(difference(iterable, func))
  1466. [1, 2, 3, 4, 5]
  1467. """
  1468. a, b = tee(iterable)
  1469. try:
  1470. item = next(b)
  1471. except StopIteration:
  1472. return iter([])
  1473. return chain([item], map(lambda x: func(x[1], x[0]), zip(a, b)))
  1474. class SequenceView(Sequence):
  1475. """Return a read-only view of the sequence object *target*.
  1476. :class:`SequenceView` objects are analogous to Python's built-in
  1477. "dictionary view" types. They provide a dynamic view of a sequence's items,
  1478. meaning that when the sequence updates, so does the view.
  1479. >>> seq = ['0', '1', '2']
  1480. >>> view = SequenceView(seq)
  1481. >>> view
  1482. SequenceView(['0', '1', '2'])
  1483. >>> seq.append('3')
  1484. >>> view
  1485. SequenceView(['0', '1', '2', '3'])
  1486. Sequence views support indexing, slicing, and length queries. They act
  1487. like the underlying sequence, except they don't allow assignment:
  1488. >>> view[1]
  1489. '1'
  1490. >>> view[1:-1]
  1491. ['1', '2']
  1492. >>> len(view)
  1493. 4
  1494. Sequence views are useful as an alternative to copying, as they don't
  1495. require (much) extra storage.
  1496. """
  1497. def __init__(self, target):
  1498. if not isinstance(target, Sequence):
  1499. raise TypeError
  1500. self._target = target
  1501. def __getitem__(self, index):
  1502. return self._target[index]
  1503. def __len__(self):
  1504. return len(self._target)
  1505. def __repr__(self):
  1506. return '{}({})'.format(self.__class__.__name__, repr(self._target))
  1507. class seekable(object):
  1508. """Wrap an iterator to allow for seeking backward and forward. This
  1509. progressively caches the items in the source iterable so they can be
  1510. re-visited.
  1511. Call :meth:`seek` with an index to seek to that position in the source
  1512. iterable.
  1513. To "reset" an iterator, seek to ``0``:
  1514. >>> from itertools import count
  1515. >>> it = seekable((str(n) for n in count()))
  1516. >>> next(it), next(it), next(it)
  1517. ('0', '1', '2')
  1518. >>> it.seek(0)
  1519. >>> next(it), next(it), next(it)
  1520. ('0', '1', '2')
  1521. >>> next(it)
  1522. '3'
  1523. You can also seek forward:
  1524. >>> it = seekable((str(n) for n in range(20)))
  1525. >>> it.seek(10)
  1526. >>> next(it)
  1527. '10'
  1528. >>> it.seek(20) # Seeking past the end of the source isn't a problem
  1529. >>> list(it)
  1530. []
  1531. >>> it.seek(0) # Resetting works even after hitting the end
  1532. >>> next(it), next(it), next(it)
  1533. ('0', '1', '2')
  1534. The cache grows as the source iterable progresses, so beware of wrapping
  1535. very large or infinite iterables.
  1536. You may view the contents of the cache with the :meth:`elements` method.
  1537. That returns a :class:`SequenceView`, a view that updates automatically:
  1538. >>> it = seekable((str(n) for n in range(10)))
  1539. >>> next(it), next(it), next(it)
  1540. ('0', '1', '2')
  1541. >>> elements = it.elements()
  1542. >>> elements
  1543. SequenceView(['0', '1', '2'])
  1544. >>> next(it)
  1545. '3'
  1546. >>> elements
  1547. SequenceView(['0', '1', '2', '3'])
  1548. """
  1549. def __init__(self, iterable):
  1550. self._source = iter(iterable)
  1551. self._cache = []
  1552. self._index = None
  1553. def __iter__(self):
  1554. return self
  1555. def __next__(self):
  1556. if self._index is not None:
  1557. try:
  1558. item = self._cache[self._index]
  1559. except IndexError:
  1560. self._index = None
  1561. else:
  1562. self._index += 1
  1563. return item
  1564. item = next(self._source)
  1565. self._cache.append(item)
  1566. return item
  1567. next = __next__
  1568. def elements(self):
  1569. return SequenceView(self._cache)
  1570. def seek(self, index):
  1571. self._index = index
  1572. remainder = index - len(self._cache)
  1573. if remainder > 0:
  1574. consume(self, remainder)
  1575. class run_length(object):
  1576. """
  1577. :func:`run_length.encode` compresses an iterable with run-length encoding.
  1578. It yields groups of repeated items with the count of how many times they
  1579. were repeated:
  1580. >>> uncompressed = 'abbcccdddd'
  1581. >>> list(run_length.encode(uncompressed))
  1582. [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
  1583. :func:`run_length.decode` decompresses an iterable that was previously
  1584. compressed with run-length encoding. It yields the items of the
  1585. decompressed iterable:
  1586. >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
  1587. >>> list(run_length.decode(compressed))
  1588. ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
  1589. """
  1590. @staticmethod
  1591. def encode(iterable):
  1592. return ((k, ilen(g)) for k, g in groupby(iterable))
  1593. @staticmethod
  1594. def decode(iterable):
  1595. return chain.from_iterable(repeat(k, n) for k, n in iterable)
  1596. def exactly_n(iterable, n, predicate=bool):
  1597. """Return ``True`` if exactly ``n`` items in the iterable are ``True``
  1598. according to the *predicate* function.
  1599. >>> exactly_n([True, True, False], 2)
  1600. True
  1601. >>> exactly_n([True, True, False], 1)
  1602. False
  1603. >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
  1604. True
  1605. The iterable will be advanced until ``n + 1`` truthy items are encountered,
  1606. so avoid calling it on infinite iterables.
  1607. """
  1608. return len(take(n + 1, filter(predicate, iterable))) == n
  1609. def circular_shifts(iterable):
  1610. """Return a list of circular shifts of *iterable*.
  1611. >>> circular_shifts(range(4))
  1612. [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
  1613. """
  1614. lst = list(iterable)
  1615. return take(len(lst), windowed(cycle(lst), len(lst)))
  1616. def make_decorator(wrapping_func, result_index=0):
  1617. """Return a decorator version of *wrapping_func*, which is a function that
  1618. modifies an iterable. *result_index* is the position in that function's
  1619. signature where the iterable goes.
  1620. This lets you use itertools on the "production end," i.e. at function
  1621. definition. This can augment what the function returns without changing the
  1622. function's code.
  1623. For example, to produce a decorator version of :func:`chunked`:
  1624. >>> from more_itertools import chunked
  1625. >>> chunker = make_decorator(chunked, result_index=0)
  1626. >>> @chunker(3)
  1627. ... def iter_range(n):
  1628. ... return iter(range(n))
  1629. ...
  1630. >>> list(iter_range(9))
  1631. [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
  1632. To only allow truthy items to be returned:
  1633. >>> truth_serum = make_decorator(filter, result_index=1)
  1634. >>> @truth_serum(bool)
  1635. ... def boolean_test():
  1636. ... return [0, 1, '', ' ', False, True]
  1637. ...
  1638. >>> list(boolean_test())
  1639. [1, ' ', True]
  1640. The :func:`peekable` and :func:`seekable` wrappers make for practical
  1641. decorators:
  1642. >>> from more_itertools import peekable
  1643. >>> peekable_function = make_decorator(peekable)
  1644. >>> @peekable_function()
  1645. ... def str_range(*args):
  1646. ... return (str(x) for x in range(*args))
  1647. ...
  1648. >>> it = str_range(1, 20, 2)
  1649. >>> next(it), next(it), next(it)
  1650. ('1', '3', '5')
  1651. >>> it.peek()
  1652. '7'
  1653. >>> next(it)
  1654. '7'
  1655. """
  1656. # See https://sites.google.com/site/bbayles/index/decorator_factory for
  1657. # notes on how this works.
  1658. def decorator(*wrapping_args, **wrapping_kwargs):
  1659. def outer_wrapper(f):
  1660. def inner_wrapper(*args, **kwargs):
  1661. result = f(*args, **kwargs)
  1662. wrapping_args_ = list(wrapping_args)
  1663. wrapping_args_.insert(result_index, result)
  1664. return wrapping_func(*wrapping_args_, **wrapping_kwargs)
  1665. return inner_wrapper
  1666. return outer_wrapper
  1667. return decorator
  1668. def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
  1669. """Return a dictionary that maps the items in *iterable* to categories
  1670. defined by *keyfunc*, transforms them with *valuefunc*, and
  1671. then summarizes them by category with *reducefunc*.
  1672. *valuefunc* defaults to the identity function if it is unspecified.
  1673. If *reducefunc* is unspecified, no summarization takes place:
  1674. >>> keyfunc = lambda x: x.upper()
  1675. >>> result = map_reduce('abbccc', keyfunc)
  1676. >>> sorted(result.items())
  1677. [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
  1678. Specifying *valuefunc* transforms the categorized items:
  1679. >>> keyfunc = lambda x: x.upper()
  1680. >>> valuefunc = lambda x: 1
  1681. >>> result = map_reduce('abbccc', keyfunc, valuefunc)
  1682. >>> sorted(result.items())
  1683. [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
  1684. Specifying *reducefunc* summarizes the categorized items:
  1685. >>> keyfunc = lambda x: x.upper()
  1686. >>> valuefunc = lambda x: 1
  1687. >>> reducefunc = sum
  1688. >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
  1689. >>> sorted(result.items())
  1690. [('A', 1), ('B', 2), ('C', 3)]
  1691. You may want to filter the input iterable before applying the map/reduce
  1692. procedure:
  1693. >>> all_items = range(30)
  1694. >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
  1695. >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
  1696. >>> categories = map_reduce(items, keyfunc=keyfunc)
  1697. >>> sorted(categories.items())
  1698. [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
  1699. >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
  1700. >>> sorted(summaries.items())
  1701. [(0, 90), (1, 75)]
  1702. Note that all items in the iterable are gathered into a list before the
  1703. summarization step, which may require significant storage.
  1704. The returned object is a :obj:`collections.defaultdict` with the
  1705. ``default_factory`` set to ``None``, such that it behaves like a normal
  1706. dictionary.
  1707. """
  1708. valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc
  1709. ret = defaultdict(list)
  1710. for item in iterable:
  1711. key = keyfunc(item)
  1712. value = valuefunc(item)
  1713. ret[key].append(value)
  1714. if reducefunc is not None:
  1715. for key, value_list in ret.items():
  1716. ret[key] = reducefunc(value_list)
  1717. ret.default_factory = None
  1718. return ret
  1719. def rlocate(iterable, pred=bool, window_size=None):
  1720. """Yield the index of each item in *iterable* for which *pred* returns
  1721. ``True``, starting from the right and moving left.
  1722. *pred* defaults to :func:`bool`, which will select truthy items:
  1723. >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
  1724. [4, 2, 1]
  1725. Set *pred* to a custom function to, e.g., find the indexes for a particular
  1726. item:
  1727. >>> iterable = iter('abcb')
  1728. >>> pred = lambda x: x == 'b'
  1729. >>> list(rlocate(iterable, pred))
  1730. [3, 1]
  1731. If *window_size* is given, then the *pred* function will be called with
  1732. that many items. This enables searching for sub-sequences:
  1733. >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
  1734. >>> pred = lambda *args: args == (1, 2, 3)
  1735. >>> list(rlocate(iterable, pred=pred, window_size=3))
  1736. [9, 5, 1]
  1737. Beware, this function won't return anything for infinite iterables.
  1738. If *iterable* is reversible, ``rlocate`` will reverse it and search from
  1739. the right. Otherwise, it will search from the left and return the results
  1740. in reverse order.
  1741. See :func:`locate` to for other example applications.
  1742. """
  1743. if window_size is None:
  1744. try:
  1745. len_iter = len(iterable)
  1746. return (
  1747. len_iter - i - 1 for i in locate(reversed(iterable), pred)
  1748. )
  1749. except TypeError:
  1750. pass
  1751. return reversed(list(locate(iterable, pred, window_size)))
  1752. def replace(iterable, pred, substitutes, count=None, window_size=1):
  1753. """Yield the items from *iterable*, replacing the items for which *pred*
  1754. returns ``True`` with the items from the iterable *substitutes*.
  1755. >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
  1756. >>> pred = lambda x: x == 0
  1757. >>> substitutes = (2, 3)
  1758. >>> list(replace(iterable, pred, substitutes))
  1759. [1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
  1760. If *count* is given, the number of replacements will be limited:
  1761. >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
  1762. >>> pred = lambda x: x == 0
  1763. >>> substitutes = [None]
  1764. >>> list(replace(iterable, pred, substitutes, count=2))
  1765. [1, 1, None, 1, 1, None, 1, 1, 0]
  1766. Use *window_size* to control the number of items passed as arguments to
  1767. *pred*. This allows for locating and replacing subsequences.
  1768. >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
  1769. >>> window_size = 3
  1770. >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
  1771. >>> substitutes = [3, 4] # Splice in these items
  1772. >>> list(replace(iterable, pred, substitutes, window_size=window_size))
  1773. [3, 4, 5, 3, 4, 5]
  1774. """
  1775. if window_size < 1:
  1776. raise ValueError('window_size must be at least 1')
  1777. # Save the substitutes iterable, since it's used more than once
  1778. substitutes = tuple(substitutes)
  1779. # Add padding such that the number of windows matches the length of the
  1780. # iterable
  1781. it = chain(iterable, [_marker] * (window_size - 1))
  1782. windows = windowed(it, window_size)
  1783. n = 0
  1784. for w in windows:
  1785. # If the current window matches our predicate (and we haven't hit
  1786. # our maximum number of replacements), splice in the substitutes
  1787. # and then consume the following windows that overlap with this one.
  1788. # For example, if the iterable is (0, 1, 2, 3, 4...)
  1789. # and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
  1790. # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
  1791. if pred(*w):
  1792. if (count is None) or (n < count):
  1793. n += 1
  1794. for s in substitutes:
  1795. yield s
  1796. consume(windows, window_size - 1)
  1797. continue
  1798. # If there was no match (or we've reached the replacement limit),
  1799. # yield the first item from the window.
  1800. if w and (w[0] is not _marker):
  1801. yield w[0]