mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 22:43:57 +00:00
allow to set minimal download delay for autothrottle extension. also
limit download delay to a minimal of spider.download_delay if given
This commit is contained in:
parent
fc52d8d5cf
commit
59cf9d9b1a
@ -69,12 +69,15 @@ class AutoThrottle(object):
|
||||
self.CONCURRENCY_CHECK_PERIOD = settings.getint("AUTOTHROTTLE_CONCURRENCY_CHECK_PERIOD", 10)
|
||||
self.MAX_CONCURRENCY = settings.getint("AUTOTHROTTLE_MAX_CONCURRENCY", 8)
|
||||
self.DEBUG = settings.getint("AUTOTHROTTLE_DEBUG", False)
|
||||
self.MIN_DOWNLOAD_DELAY = settings.getint("AUTOTHROTTLE_MIN_DOWNLOAD_DELAY")
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
return cls(crawler)
|
||||
|
||||
def spider_opened(self, spider):
|
||||
if hasattr(spider, "download_delay"):
|
||||
self.MIN_DOWNLOAD_DELAY = spider.download_delay
|
||||
spider.download_delay = self.START_DELAY
|
||||
if hasattr(spider, "max_concurrent_requests"):
|
||||
self.MAX_CONCURRENCY = spider.max_concurrent_requests
|
||||
@ -82,7 +85,7 @@ class AutoThrottle(object):
|
||||
spider.max_concurrent_requests = 1
|
||||
self.last_latencies = [self.START_DELAY]
|
||||
self.last_lat = self.START_DELAY, 0.0
|
||||
|
||||
|
||||
def response_received(self, response, spider):
|
||||
slot = self._get_slot(response.request)
|
||||
latency = response.meta.get('download_latency')
|
||||
@ -124,6 +127,9 @@ class AutoThrottle(object):
|
||||
# if latency is bigger than old delay, then use latency instead of mean. Works better with problematic sites
|
||||
new_delay = (slot.delay + latency) / 2.0 if latency < slot.delay else latency
|
||||
|
||||
if new_delay < self.MIN_DOWNLOAD_DELAY:
|
||||
new_delay = self.MIN_DOWNLOAD_DELAY
|
||||
|
||||
# dont adjust delay if response status != 200 and new delay is smaller than old one,
|
||||
# as error pages (and redirections) are usually small and so tend to reduce latency, thus provoking a positive feedback
|
||||
# by reducing delay instead of increase.
|
||||
|
Loading…
x
Reference in New Issue
Block a user