1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 06:43:44 +00:00

371 lines
11 KiB
Python
Raw Normal View History

from collections import deque, defaultdict
from heapq import heappush, heappop
import time
from itertools import chain
#------------------------------------------------------------------------------
class PriorityQueue1(object):
"""heapq
A simple priority queue
"""
def __init__(self, size=1):
self.items = []
def push(self, item, priority=0):
heappush(self.items, (priority, time.time(), item))
def pop(self):
priority, _, item = heappop(self.items)
return item, priority
def __len__(self):
return len(self.items)
def __iter__(self):
return ((item, priority) for priority, _, item in self.items)
def __nonzero__(self):
return bool(self.items)
class PriorityQueue1b(PriorityQueue1):
"""heapq+int
A simple priority queue using incremental integer instead of time.time()
"""
time = 0
def push(self, item, priority=0):
self.time += 1
heappush(self.items, (priority, time, item))
#------------------------------------------------------------------------------
class PriorityQueue2(object):
"""dict+deque
@author: Federico Feroldi <federico@cloudify.me>
"""
def __init__(self, size=1):
self.items = defaultdict(deque)
def push(self, item, priority=0):
self.items[priority].appendleft(item)
def pop(self):
priorities = self.items.keys()
priorities.sort()
for priority in priorities:
if len(self.items[priority]) > 0:
return (self.items[priority].pop(), priority)
raise IndexError
def __len__(self):
totlen = 0
for q in self.items.values():
totlen += len(q)
return totlen
def __iter__(self):
priorities = self.items.keys()
priorities.sort()
for priority in priorities:
for i in self.items[priority]:
yield (i, priority)
def __nonzero__(self):
for q in self.items.values():
if len(q) > 0:
return True
return False
#------------------------------------------------------------------------------
class PriorityQueue3(object):
"""deque+heapq"""
def __init__(self, size=1):
self.negitems = []
self.pzero = deque()
self.positems = []
def push(self, item, priority=0):
if priority == 0:
self.pzero.appendleft(item)
elif priority < 0:
heappush(self.negitems, (priority, time.time(), item))
else:
heappush(self.positems, (priority, time.time(), item))
def pop(self):
if self.negitems:
priority, _, item = heappop(self.negitems)
return item, priority
elif self.pzero:
return (self.pzero.pop(), 0)
else:
priority, _, item = heappop(self.positems)
return item, priority
def __len__(self):
return len(self.negitems) + len(self.pzero) + len(self.positems)
def __iter__(self):
for priority, _, item in self.negitems:
yield (item, priority)
for item in self.pzero:
yield (item, 0)
for priority, _, item in self.positems:
yield (item, priority)
def __nonzero__(self):
return bool(self.negitems and self.pzero and self.positems)
class PriorityQueue3b(PriorityQueue3):
"""deque+heapq+int"""
left_time = 0
right_time = 0
def push(self, item, priority=0):
if priority == 0:
self.pzero.appendleft(item)
elif priority < 0:
self.left_time += 1
heappush(self.negitems, (priority, self.left_time, item))
else:
self.right_time += 1
heappush(self.positems, (priority, self.right_time, item))
#------------------------------------------------------------------------------
class PriorityQueue4(object):
"""deque+defaultdict+deque"""
def __init__(self, size=1):
self.negitems = defaultdict(deque)
self.pzero = deque()
self.positems = defaultdict(deque)
def push(self, item, priority=0):
if priority == 0:
self.pzero.appendleft(item)
elif priority < 0:
self.negitems[priority].appendleft(item)
else:
self.positems[priority].appendleft(item)
def pop(self):
if self.negitems:
priorities = self.negitems.keys()
priorities.sort()
for priority in priorities:
deq = self.negitems[priority]
if deq:
t = (deq.pop(), priority)
if not deq:
del self.negitems[priority]
return t
elif self.pzero:
return (self.pzero.pop(), 0)
else:
priorities = self.positems.keys()
priorities.sort()
for priority in priorities:
deq = self.positems[priority]
if deq:
t = (deq.pop(), priority)
if not deq:
del self.positems[priority]
return t
raise IndexError("pop from an empty queue")
def __len__(self):
total = sum(len(v) for v in self.negitems.values()) + \
len(self.pzero) + \
sum(len(v) for v in self.positems.values())
return total
def __iter__(self):
gen_negs = ((i, priority)
for priority in sorted(self.negitems.keys())
for i in reversed(self.negitems[priority]))
gen_zeros = ((item,0) for item in self.pzero)
gen_pos = ((i, priority)
for priority in sorted(self.positems.keys())
for i in reversed(self.positems[priority]))
return chain(gen_negs, gen_zeros, gen_pos)
def __nonzero__(self):
return bool(self.negitems or self.pzero or self.positems)
class PriorityQueue4b(object):
"""deque+defaultdict+deque+cache"""
def __init__(self, size=1):
self.negitems = defaultdict(deque)
self.pzero = deque()
self.positems = defaultdict(deque)
self._sort_neg = False
self._sort_pos = False
self._cached_sorted_negitems = []
self._cached_sorted_positems = []
def push(self, item, priority=0):
if priority == 0:
self.pzero.appendleft(item)
elif priority < 0:
if priority not in self.negitems:
self._sort_neg = True
self.negitems[priority].appendleft(item)
else:
if priority not in self.positems:
self._sort_pos = True
self.positems[priority].appendleft(item)
def pop(self):
if self.negitems:
if self._sort_neg:
priorities = self.negitems.keys()
priorities.sort()
self._cached_sorted_negitems = priorities
self._sort_neg = False
else:
priorities = self._cached_sorted_negitems
for priority in priorities:
deq = self.negitems[priority]
if deq:
t = (deq.pop(), priority)
if not deq:
self._sort_neg = True
del self.negitems[priority]
return t
elif self.pzero:
return (self.pzero.pop(), 0)
else:
if self._sort_pos:
priorities = self.positems.keys()
priorities.sort()
self._cached_sorted_positems = priorities
self._sort_pos = False
else:
priorities = self._cached_sorted_positems
priorities = self.positems.keys()
priorities.sort()
for priority in priorities:
deq = self.positems[priority]
if deq:
t = (deq.pop(), priority)
if not deq:
self._sort_pos = True
del self.positems[priority]
return t
raise IndexError("pop from an empty queue")
def __len__(self):
total = sum(len(v) for v in self.negitems.values()) + \
len(self.pzero) + \
sum(len(v) for v in self.positems.values())
return total
def __iter__(self):
gen_negs = ((i, priority)
for priority in sorted(self.negitems.keys())
for i in reversed(self.negitems[priority]))
gen_zeros = ((item,0) for item in self.pzero)
gen_pos = ((i, priority)
for priority in sorted(self.positems.keys())
for i in reversed(self.positems[priority]))
return chain(gen_negs, gen_zeros, gen_pos)
def __nonzero__(self):
return bool(self.negitems or self.pzero or self.positems)
#------------------------------------------------------------------------------
class PriorityQueue5(object):
"""list+deque"""
def __init__(self, size=1):
# preallocate deques for a fixed number of priorities
size = size if size % 2 else size + 1
self.zero = size // 2
self.priolist = [deque() for _ in range(size)]
def push(self, item, priority=0):
self.priolist[priority + self.zero].appendleft(item)
def pop(self):
for prio, queue in enumerate(self.priolist):
if len(queue):
final = prio - self.zero
return (queue.pop(), final)
raise IndexError("pop from an empty queue")
def __len__(self):
return sum(len(v) for v in self.priolist)
def __iter__(self):
for prio, queue in enumerate(self.priolist):
final = prio - self.zero
for i in reversed(queue):
yield (i, final)
def __nonzero__(self):
return any(d for d in self.priolist)
class PriorityQueue5b(PriorityQueue5):
"""list+deque+cache"""
index = 0
def push(self, item, priority=0):
profiling: improve list+deque+cache performance results: == With 1 priorities (/tmp/pq-13882-1-50000) == pushpops = 50000, times = 30 heapq implementation: 12.6956150532 dict+deque implementation: 5.3080239296 deque+heapq implementation: 3.11057305336 deque+defaultdict+deque implementation: 3.06583619118 list+deque implementation: 4.85028195381 list+deque+cache implementation: 4.86092495918 == With 3 priorities (/tmp/pq-13882-3-50000) == pushpops = 50000, times = 30 heapq implementation: 13.9048631191 dict+deque implementation: 6.526501894 deque+heapq implementation: 9.95749187469 deque+defaultdict+deque implementation: 4.94318699837 list+deque implementation: 5.48832702637 list+deque+cache implementation: 4.77395009995 == With 5 priorities (/tmp/pq-13882-5-50000) == pushpops = 50000, times = 30 heapq implementation: 14.1862449646 dict+deque implementation: 7.45535206795 deque+heapq implementation: 11.7175529003 deque+defaultdict+deque implementation: 5.40972518921 list+deque implementation: 5.87488412857 list+deque+cache implementation: 4.73579287529 == With 10 priorities (/tmp/pq-13882-10-50000) == pushpops = 50000, times = 30 heapq implementation: 14.2052979469 dict+deque implementation: 9.94834208488 deque+heapq implementation: 13.0460109711 deque+defaultdict+deque implementation: 5.79300785065 list+deque implementation: 6.9981739521 list+deque+cache implementation: 4.81988596916 == With 100 priorities (/tmp/pq-13882-100-50000) == pushpops = 50000, times = 30 heapq implementation: 14.9574189186 dict+deque implementation: 55.6348400116 deque+heapq implementation: 14.9515259266 deque+defaultdict+deque implementation: 10.6776599884 list+deque implementation: 25.9212520123 list+deque+cache implementation: 4.77596998215 --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40996
2009-03-19 03:11:27 +00:00
i = priority + self.zero
self.priolist[i].appendleft(item)
self.index = min(self.index, i)
def pop(self):
cached = self.priolist[self.index]
profiling: improve list+deque+cache performance results: == With 1 priorities (/tmp/pq-13882-1-50000) == pushpops = 50000, times = 30 heapq implementation: 12.6956150532 dict+deque implementation: 5.3080239296 deque+heapq implementation: 3.11057305336 deque+defaultdict+deque implementation: 3.06583619118 list+deque implementation: 4.85028195381 list+deque+cache implementation: 4.86092495918 == With 3 priorities (/tmp/pq-13882-3-50000) == pushpops = 50000, times = 30 heapq implementation: 13.9048631191 dict+deque implementation: 6.526501894 deque+heapq implementation: 9.95749187469 deque+defaultdict+deque implementation: 4.94318699837 list+deque implementation: 5.48832702637 list+deque+cache implementation: 4.77395009995 == With 5 priorities (/tmp/pq-13882-5-50000) == pushpops = 50000, times = 30 heapq implementation: 14.1862449646 dict+deque implementation: 7.45535206795 deque+heapq implementation: 11.7175529003 deque+defaultdict+deque implementation: 5.40972518921 list+deque implementation: 5.87488412857 list+deque+cache implementation: 4.73579287529 == With 10 priorities (/tmp/pq-13882-10-50000) == pushpops = 50000, times = 30 heapq implementation: 14.2052979469 dict+deque implementation: 9.94834208488 deque+heapq implementation: 13.0460109711 deque+defaultdict+deque implementation: 5.79300785065 list+deque implementation: 6.9981739521 list+deque+cache implementation: 4.81988596916 == With 100 priorities (/tmp/pq-13882-100-50000) == pushpops = 50000, times = 30 heapq implementation: 14.9574189186 dict+deque implementation: 55.6348400116 deque+heapq implementation: 14.9515259266 deque+defaultdict+deque implementation: 10.6776599884 list+deque implementation: 25.9212520123 list+deque+cache implementation: 4.77596998215 --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40996
2009-03-19 03:11:27 +00:00
if cached:
return (cached.pop(), self.index - self.zero)
for prio, queue in enumerate(self.priolist[self.index:]):
profiling: improve list+deque+cache performance results: == With 1 priorities (/tmp/pq-13882-1-50000) == pushpops = 50000, times = 30 heapq implementation: 12.6956150532 dict+deque implementation: 5.3080239296 deque+heapq implementation: 3.11057305336 deque+defaultdict+deque implementation: 3.06583619118 list+deque implementation: 4.85028195381 list+deque+cache implementation: 4.86092495918 == With 3 priorities (/tmp/pq-13882-3-50000) == pushpops = 50000, times = 30 heapq implementation: 13.9048631191 dict+deque implementation: 6.526501894 deque+heapq implementation: 9.95749187469 deque+defaultdict+deque implementation: 4.94318699837 list+deque implementation: 5.48832702637 list+deque+cache implementation: 4.77395009995 == With 5 priorities (/tmp/pq-13882-5-50000) == pushpops = 50000, times = 30 heapq implementation: 14.1862449646 dict+deque implementation: 7.45535206795 deque+heapq implementation: 11.7175529003 deque+defaultdict+deque implementation: 5.40972518921 list+deque implementation: 5.87488412857 list+deque+cache implementation: 4.73579287529 == With 10 priorities (/tmp/pq-13882-10-50000) == pushpops = 50000, times = 30 heapq implementation: 14.2052979469 dict+deque implementation: 9.94834208488 deque+heapq implementation: 13.0460109711 deque+defaultdict+deque implementation: 5.79300785065 list+deque implementation: 6.9981739521 list+deque+cache implementation: 4.81988596916 == With 100 priorities (/tmp/pq-13882-100-50000) == pushpops = 50000, times = 30 heapq implementation: 14.9574189186 dict+deque implementation: 55.6348400116 deque+heapq implementation: 14.9515259266 deque+defaultdict+deque implementation: 10.6776599884 list+deque implementation: 25.9212520123 list+deque+cache implementation: 4.77596998215 --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40996
2009-03-19 03:11:27 +00:00
if queue:
self.index += prio
return (queue.pop(), self.index - self.zero)
raise IndexError("pop from an empty queue")
from itertools import islice
class PriorityQueue5c(PriorityQueue5b):
"""list+deque+cache+islice"""
def pop(self):
cached = self.priolist[self.index]
if cached:
return (cached.pop(), self.index - self.zero)
for prio, queue in enumerate(islice(self.priolist, self.index, None), self.index):
if queue:
self.index = prio
return (queue.pop(), prio - self.zero)
raise IndexError("pop from an empty queue")
#------------------------------------------------------------------------------
__all__ = [name for name in globals().keys() if name.startswith('PriorityQueue')]