172 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			172 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from collections import defaultdict, deque
 | |
| from itertools import filterfalse
 | |
| 
 | |
| 
 | |
| def unique_everseen(iterable, key=None):
 | |
|     "List unique elements, preserving order. Remember all elements ever seen."
 | |
|     # unique_everseen('AAAABBBCCDAABBB') --> A B C D
 | |
|     # unique_everseen('ABBCcAD', str.lower) --> A B C D
 | |
|     seen = set()
 | |
|     seen_add = seen.add
 | |
|     if key is None:
 | |
|         for element in filterfalse(seen.__contains__, iterable):
 | |
|             seen_add(element)
 | |
|             yield element
 | |
|     else:
 | |
|         for element in iterable:
 | |
|             k = key(element)
 | |
|             if k not in seen:
 | |
|                 seen_add(k)
 | |
|                 yield element
 | |
| 
 | |
| 
 | |
| # copied from more_itertools 8.8
 | |
| def always_iterable(obj, base_type=(str, bytes)):
 | |
|     """If *obj* is iterable, return an iterator over its items::
 | |
| 
 | |
|         >>> obj = (1, 2, 3)
 | |
|         >>> list(always_iterable(obj))
 | |
|         [1, 2, 3]
 | |
| 
 | |
|     If *obj* is not iterable, return a one-item iterable containing *obj*::
 | |
| 
 | |
|         >>> obj = 1
 | |
|         >>> list(always_iterable(obj))
 | |
|         [1]
 | |
| 
 | |
|     If *obj* is ``None``, return an empty iterable:
 | |
| 
 | |
|         >>> obj = None
 | |
|         >>> list(always_iterable(None))
 | |
|         []
 | |
| 
 | |
|     By default, binary and text strings are not considered iterable::
 | |
| 
 | |
|         >>> obj = 'foo'
 | |
|         >>> list(always_iterable(obj))
 | |
|         ['foo']
 | |
| 
 | |
|     If *base_type* is set, objects for which ``isinstance(obj, base_type)``
 | |
|     returns ``True`` won't be considered iterable.
 | |
| 
 | |
|         >>> obj = {'a': 1}
 | |
|         >>> list(always_iterable(obj))  # Iterate over the dict's keys
 | |
|         ['a']
 | |
|         >>> list(always_iterable(obj, base_type=dict))  # Treat dicts as a unit
 | |
|         [{'a': 1}]
 | |
| 
 | |
|     Set *base_type* to ``None`` to avoid any special handling and treat objects
 | |
|     Python considers iterable as iterable:
 | |
| 
 | |
|         >>> obj = 'foo'
 | |
|         >>> list(always_iterable(obj, base_type=None))
 | |
|         ['f', 'o', 'o']
 | |
|     """
 | |
|     if obj is None:
 | |
|         return iter(())
 | |
| 
 | |
|     if (base_type is not None) and isinstance(obj, base_type):
 | |
|         return iter((obj,))
 | |
| 
 | |
|     try:
 | |
|         return iter(obj)
 | |
|     except TypeError:
 | |
|         return iter((obj,))
 | |
| 
 | |
| 
 | |
| # Copied from more_itertools 10.3
 | |
| class bucket:
 | |
|     """Wrap *iterable* and return an object that buckets the iterable into
 | |
|     child iterables based on a *key* function.
 | |
| 
 | |
|         >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
 | |
|         >>> s = bucket(iterable, key=lambda x: x[0])  # Bucket by 1st character
 | |
|         >>> sorted(list(s))  # Get the keys
 | |
|         ['a', 'b', 'c']
 | |
|         >>> a_iterable = s['a']
 | |
|         >>> next(a_iterable)
 | |
|         'a1'
 | |
|         >>> next(a_iterable)
 | |
|         'a2'
 | |
|         >>> list(s['b'])
 | |
|         ['b1', 'b2', 'b3']
 | |
| 
 | |
|     The original iterable will be advanced and its items will be cached until
 | |
|     they are used by the child iterables. This may require significant storage.
 | |
| 
 | |
|     By default, attempting to select a bucket to which no items belong  will
 | |
|     exhaust the iterable and cache all values.
 | |
|     If you specify a *validator* function, selected buckets will instead be
 | |
|     checked against it.
 | |
| 
 | |
|         >>> from itertools import count
 | |
|         >>> it = count(1, 2)  # Infinite sequence of odd numbers
 | |
|         >>> key = lambda x: x % 10  # Bucket by last digit
 | |
|         >>> validator = lambda x: x in {1, 3, 5, 7, 9}  # Odd digits only
 | |
|         >>> s = bucket(it, key=key, validator=validator)
 | |
|         >>> 2 in s
 | |
|         False
 | |
|         >>> list(s[2])
 | |
|         []
 | |
| 
 | |
|     """
 | |
| 
 | |
|     def __init__(self, iterable, key, validator=None):
 | |
|         self._it = iter(iterable)
 | |
|         self._key = key
 | |
|         self._cache = defaultdict(deque)
 | |
|         self._validator = validator or (lambda x: True)
 | |
| 
 | |
|     def __contains__(self, value):
 | |
|         if not self._validator(value):
 | |
|             return False
 | |
| 
 | |
|         try:
 | |
|             item = next(self[value])
 | |
|         except StopIteration:
 | |
|             return False
 | |
|         else:
 | |
|             self._cache[value].appendleft(item)
 | |
| 
 | |
|         return True
 | |
| 
 | |
|     def _get_values(self, value):
 | |
|         """
 | |
|         Helper to yield items from the parent iterator that match *value*.
 | |
|         Items that don't match are stored in the local cache as they
 | |
|         are encountered.
 | |
|         """
 | |
|         while True:
 | |
|             # If we've cached some items that match the target value, emit
 | |
|             # the first one and evict it from the cache.
 | |
|             if self._cache[value]:
 | |
|                 yield self._cache[value].popleft()
 | |
|             # Otherwise we need to advance the parent iterator to search for
 | |
|             # a matching item, caching the rest.
 | |
|             else:
 | |
|                 while True:
 | |
|                     try:
 | |
|                         item = next(self._it)
 | |
|                     except StopIteration:
 | |
|                         return
 | |
|                     item_value = self._key(item)
 | |
|                     if item_value == value:
 | |
|                         yield item
 | |
|                         break
 | |
|                     elif self._validator(item_value):
 | |
|                         self._cache[item_value].append(item)
 | |
| 
 | |
|     def __iter__(self):
 | |
|         for item in self._it:
 | |
|             item_value = self._key(item)
 | |
|             if self._validator(item_value):
 | |
|                 self._cache[item_value].append(item)
 | |
| 
 | |
|         yield from self._cache.keys()
 | |
| 
 | |
|     def __getitem__(self, value):
 | |
|         if not self._validator(value):
 | |
|             return iter(())
 | |
| 
 | |
|         return self._get_values(value)
 |