123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- from collections import defaultdict, deque
- from itertools import filterfalse
- def unique_everseen(iterable, key=None):
- "List unique elements, preserving order. Remember all elements ever seen."
- # unique_everseen('AAAABBBCCDAABBB') --> A B C D
- # unique_everseen('ABBCcAD', str.lower) --> A B C D
- seen = set()
- seen_add = seen.add
- if key is None:
- for element in filterfalse(seen.__contains__, iterable):
- seen_add(element)
- yield element
- else:
- for element in iterable:
- k = key(element)
- if k not in seen:
- seen_add(k)
- yield element
- # copied from more_itertools 8.8
- def always_iterable(obj, base_type=(str, bytes)):
- """If *obj* is iterable, return an iterator over its items::
- >>> obj = (1, 2, 3)
- >>> list(always_iterable(obj))
- [1, 2, 3]
- If *obj* is not iterable, return a one-item iterable containing *obj*::
- >>> obj = 1
- >>> list(always_iterable(obj))
- [1]
- If *obj* is ``None``, return an empty iterable:
- >>> obj = None
- >>> list(always_iterable(None))
- []
- By default, binary and text strings are not considered iterable::
- >>> obj = 'foo'
- >>> list(always_iterable(obj))
- ['foo']
- If *base_type* is set, objects for which ``isinstance(obj, base_type)``
- returns ``True`` won't be considered iterable.
- >>> obj = {'a': 1}
- >>> list(always_iterable(obj)) # Iterate over the dict's keys
- ['a']
- >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
- [{'a': 1}]
- Set *base_type* to ``None`` to avoid any special handling and treat objects
- Python considers iterable as iterable:
- >>> obj = 'foo'
- >>> list(always_iterable(obj, base_type=None))
- ['f', 'o', 'o']
- """
- if obj is None:
- return iter(())
- if (base_type is not None) and isinstance(obj, base_type):
- return iter((obj,))
- try:
- return iter(obj)
- except TypeError:
- return iter((obj,))
- # Copied from more_itertools 10.3
- class bucket:
- """Wrap *iterable* and return an object that buckets the iterable into
- child iterables based on a *key* function.
- >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
- >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
- >>> sorted(list(s)) # Get the keys
- ['a', 'b', 'c']
- >>> a_iterable = s['a']
- >>> next(a_iterable)
- 'a1'
- >>> next(a_iterable)
- 'a2'
- >>> list(s['b'])
- ['b1', 'b2', 'b3']
- The original iterable will be advanced and its items will be cached until
- they are used by the child iterables. This may require significant storage.
- By default, attempting to select a bucket to which no items belong will
- exhaust the iterable and cache all values.
- If you specify a *validator* function, selected buckets will instead be
- checked against it.
- >>> from itertools import count
- >>> it = count(1, 2) # Infinite sequence of odd numbers
- >>> key = lambda x: x % 10 # Bucket by last digit
- >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
- >>> s = bucket(it, key=key, validator=validator)
- >>> 2 in s
- False
- >>> list(s[2])
- []
- """
- def __init__(self, iterable, key, validator=None):
- self._it = iter(iterable)
- self._key = key
- self._cache = defaultdict(deque)
- self._validator = validator or (lambda x: True)
- def __contains__(self, value):
- if not self._validator(value):
- return False
- try:
- item = next(self[value])
- except StopIteration:
- return False
- else:
- self._cache[value].appendleft(item)
- return True
- def _get_values(self, value):
- """
- Helper to yield items from the parent iterator that match *value*.
- Items that don't match are stored in the local cache as they
- are encountered.
- """
- while True:
- # If we've cached some items that match the target value, emit
- # the first one and evict it from the cache.
- if self._cache[value]:
- yield self._cache[value].popleft()
- # Otherwise we need to advance the parent iterator to search for
- # a matching item, caching the rest.
- else:
- while True:
- try:
- item = next(self._it)
- except StopIteration:
- return
- item_value = self._key(item)
- if item_value == value:
- yield item
- break
- elif self._validator(item_value):
- self._cache[item_value].append(item)
- def __iter__(self):
- for item in self._it:
- item_value = self._key(item)
- if self._validator(item_value):
- self._cache[item_value].append(item)
- yield from self._cache.keys()
- def __getitem__(self, value):
- if not self._validator(value):
- return iter(())
- return self._get_values(value)
|