mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 20:23:44 +00:00
Do not filter requests with dont_filter attribute set in OffsiteMiddleware
This commit is contained in:
parent
bff3d31469
commit
5f1b1c05f8
@ -211,7 +211,7 @@ OffsiteMiddleware
|
||||
-----------------
|
||||
|
||||
.. module:: scrapy.contrib.spidermiddleware.offsite
|
||||
:synopsis: Offiste Spider Middleware
|
||||
:synopsis: Offsite Spider Middleware
|
||||
|
||||
.. class:: OffsiteMiddleware
|
||||
|
||||
@ -236,6 +236,10 @@ OffsiteMiddleware
|
||||
:attr:`~scrapy.spider.BaseSpider.allowed_domains` attribute, or the
|
||||
attribute is empty, the offsite middleware will allow all requests.
|
||||
|
||||
If the request has the :attr:`~scrapy.http.Request.dont_filter` attribute
|
||||
set, the offsite middleware will allow the request even if its domain is not
|
||||
listed in allowed domains.
|
||||
|
||||
|
||||
RefererMiddleware
|
||||
-----------------
|
||||
|
@ -23,7 +23,7 @@ class OffsiteMiddleware(object):
|
||||
def process_spider_output(self, response, result, spider):
|
||||
for x in result:
|
||||
if isinstance(x, Request):
|
||||
if self.should_follow(x, spider):
|
||||
if x.dont_filter or self.should_follow(x, spider):
|
||||
yield x
|
||||
else:
|
||||
domain = urlparse_cached(x).hostname
|
||||
|
@ -20,8 +20,10 @@ class TestOffsiteMiddleware(TestCase):
|
||||
|
||||
onsite_reqs = [Request('http://scrapytest.org/1'),
|
||||
Request('http://scrapy.org/1'),
|
||||
Request('http://sub.scrapy.org/1')]
|
||||
offsite_reqs = [Request('http://scrapy2.org')]
|
||||
Request('http://sub.scrapy.org/1'),
|
||||
Request('http://offsite.tld/letmepass', dont_filter=True)]
|
||||
offsite_reqs = [Request('http://scrapy2.org'),
|
||||
Request('http://offsite.tld/')]
|
||||
reqs = onsite_reqs + offsite_reqs
|
||||
|
||||
out = list(self.mw.process_spider_output(res, reqs, self.spider))
|
||||
|
Loading…
x
Reference in New Issue
Block a user