diff --git a/scrapy/contrib/spidermiddleware/offsite.py b/scrapy/contrib/spidermiddleware/offsite.py index f8acbf8d8..95702d777 100644 --- a/scrapy/contrib/spidermiddleware/offsite.py +++ b/scrapy/contrib/spidermiddleware/offsite.py @@ -13,9 +13,12 @@ from scrapy import log class OffsiteMiddleware(object): + def __init__(self, stats): + self.stats = stats + @classmethod def from_crawler(cls, crawler): - o = cls() + o = cls(crawler.stats) crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) return o @@ -30,6 +33,8 @@ class OffsiteMiddleware(object): self.domains_seen.add(domain) log.msg(format="Filtered offsite request to %(domain)r: %(request)s", level=log.DEBUG, spider=spider, domain=domain, request=x) + self.stats.inc_value('offsite/domains', spider=spider) + self.stats.inc_value('offsite/filtered', spider=spider) else: yield x diff --git a/scrapy/tests/test_spidermiddleware_offsite.py b/scrapy/tests/test_spidermiddleware_offsite.py index fd6e43753..8477e4dee 100644 --- a/scrapy/tests/test_spidermiddleware_offsite.py +++ b/scrapy/tests/test_spidermiddleware_offsite.py @@ -3,13 +3,15 @@ from unittest import TestCase from scrapy.http import Response, Request from scrapy.spider import Spider from scrapy.contrib.spidermiddleware.offsite import OffsiteMiddleware +from scrapy.utils.test import get_crawler class TestOffsiteMiddleware(TestCase): def setUp(self): self.spider = self._get_spider() - self.mw = OffsiteMiddleware() + crawler = get_crawler() + self.mw = OffsiteMiddleware.from_crawler(crawler) self.mw.spider_opened(self.spider) def _get_spider(self):