allow to set minimal download delay for autothrottle extension. also

limit download delay to a minimal of spider.download_delay if given
2025-02-25 22:43:57 +00:00 · 2012-01-16 18:16:24 -02:00 · 2012-01-16 18:16:24 -02:00 · 59cf9d9b1a
commit 59cf9d9b1a
parent fc52d8d5cf
1 changed files with 7 additions and 1 deletions
--- a/scrapy/contrib/throttle.py
+++ b/scrapy/contrib/throttle.py
@ -69,12 +69,15 @@ class AutoThrottle(object):
        self.CONCURRENCY_CHECK_PERIOD = settings.getint("AUTOTHROTTLE_CONCURRENCY_CHECK_PERIOD", 10)
        self.MAX_CONCURRENCY = settings.getint("AUTOTHROTTLE_MAX_CONCURRENCY", 8)
        self.DEBUG = settings.getint("AUTOTHROTTLE_DEBUG", False)
+        self.MIN_DOWNLOAD_DELAY = settings.getint("AUTOTHROTTLE_MIN_DOWNLOAD_DELAY")

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    def spider_opened(self, spider):
+        if hasattr(spider, "download_delay"):
+            self.MIN_DOWNLOAD_DELAY = spider.download_delay
        spider.download_delay = self.START_DELAY
        if hasattr(spider, "max_concurrent_requests"):
            self.MAX_CONCURRENCY = spider.max_concurrent_requests
@ -82,7 +85,7 @@ class AutoThrottle(object):
        spider.max_concurrent_requests = 1
        self.last_latencies = [self.START_DELAY]
        self.last_lat = self.START_DELAY, 0.0
-        
+
    def response_received(self, response, spider):
        slot = self._get_slot(response.request)
        latency = response.meta.get('download_latency')
@ -124,6 +127,9 @@ class AutoThrottle(object):
        # if latency is bigger than old delay, then use latency instead of mean. Works better with problematic sites
        new_delay = (slot.delay + latency) / 2.0 if latency < slot.delay else latency

+        if new_delay < self.MIN_DOWNLOAD_DELAY:
+            new_delay = self.MIN_DOWNLOAD_DELAY
+
        # dont adjust delay if response status != 200 and new delay is smaller than old one,
        # as error pages (and redirections) are usually small and so tend to reduce latency, thus provoking a positive feedback
        # by reducing delay instead of increase.