mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 23:23:47 +00:00
Do not filter requests with dont_filter attribute set in OffsiteMiddleware
This commit is contained in:
parent
bff3d31469
commit
5f1b1c05f8
@ -211,7 +211,7 @@ OffsiteMiddleware
|
|||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
.. module:: scrapy.contrib.spidermiddleware.offsite
|
.. module:: scrapy.contrib.spidermiddleware.offsite
|
||||||
:synopsis: Offiste Spider Middleware
|
:synopsis: Offsite Spider Middleware
|
||||||
|
|
||||||
.. class:: OffsiteMiddleware
|
.. class:: OffsiteMiddleware
|
||||||
|
|
||||||
@ -236,6 +236,10 @@ OffsiteMiddleware
|
|||||||
:attr:`~scrapy.spider.BaseSpider.allowed_domains` attribute, or the
|
:attr:`~scrapy.spider.BaseSpider.allowed_domains` attribute, or the
|
||||||
attribute is empty, the offsite middleware will allow all requests.
|
attribute is empty, the offsite middleware will allow all requests.
|
||||||
|
|
||||||
|
If the request has the :attr:`~scrapy.http.Request.dont_filter` attribute
|
||||||
|
set, the offsite middleware will allow the request even if its domain is not
|
||||||
|
listed in allowed domains.
|
||||||
|
|
||||||
|
|
||||||
RefererMiddleware
|
RefererMiddleware
|
||||||
-----------------
|
-----------------
|
||||||
|
@ -23,7 +23,7 @@ class OffsiteMiddleware(object):
|
|||||||
def process_spider_output(self, response, result, spider):
|
def process_spider_output(self, response, result, spider):
|
||||||
for x in result:
|
for x in result:
|
||||||
if isinstance(x, Request):
|
if isinstance(x, Request):
|
||||||
if self.should_follow(x, spider):
|
if x.dont_filter or self.should_follow(x, spider):
|
||||||
yield x
|
yield x
|
||||||
else:
|
else:
|
||||||
domain = urlparse_cached(x).hostname
|
domain = urlparse_cached(x).hostname
|
||||||
|
@ -20,8 +20,10 @@ class TestOffsiteMiddleware(TestCase):
|
|||||||
|
|
||||||
onsite_reqs = [Request('http://scrapytest.org/1'),
|
onsite_reqs = [Request('http://scrapytest.org/1'),
|
||||||
Request('http://scrapy.org/1'),
|
Request('http://scrapy.org/1'),
|
||||||
Request('http://sub.scrapy.org/1')]
|
Request('http://sub.scrapy.org/1'),
|
||||||
offsite_reqs = [Request('http://scrapy2.org')]
|
Request('http://offsite.tld/letmepass', dont_filter=True)]
|
||||||
|
offsite_reqs = [Request('http://scrapy2.org'),
|
||||||
|
Request('http://offsite.tld/')]
|
||||||
reqs = onsite_reqs + offsite_reqs
|
reqs = onsite_reqs + offsite_reqs
|
||||||
|
|
||||||
out = list(self.mw.process_spider_output(res, reqs, self.spider))
|
out = list(self.mw.process_spider_output(res, reqs, self.spider))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user