mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 12:23:47 +00:00
Merge pull request #745 from crlane/cl/offsite-middleware-bugfix
Cl/offsite middleware bugfix
This commit is contained in:
commit
7449b25bed
@ -49,7 +49,7 @@ class OffsiteMiddleware(object):
|
||||
allowed_domains = getattr(spider, 'allowed_domains', None)
|
||||
if not allowed_domains:
|
||||
return re.compile('') # allow all by default
|
||||
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains)
|
||||
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains if d is not None)
|
||||
return re.compile(regex)
|
||||
|
||||
def spider_opened(self, spider):
|
||||
|
@ -5,6 +5,7 @@ from scrapy.spider import Spider
|
||||
from scrapy.contrib.spidermiddleware.offsite import OffsiteMiddleware
|
||||
from scrapy.utils.test import get_crawler
|
||||
|
||||
from urlparse import urlparse
|
||||
|
||||
class TestOffsiteMiddleware(TestCase):
|
||||
|
||||
@ -52,3 +53,15 @@ class TestOffsiteMiddleware3(TestOffsiteMiddleware2):
|
||||
def _get_spider(self):
|
||||
return Spider('foo')
|
||||
|
||||
|
||||
class TestOffsiteMiddleware4(TestOffsiteMiddleware3):
|
||||
|
||||
def _get_spider(self):
|
||||
bad_hostname = urlparse('http:////scrapytest.org').hostname
|
||||
return Spider('foo', allowed_domains=['scrapytest.org', None, bad_hostname])
|
||||
|
||||
def test_process_spider_output(self):
|
||||
res = Response('http://scrapytest.org')
|
||||
reqs = [Request('http://scrapytest.org/1')]
|
||||
out = list(self.mw.process_spider_output(res, reqs, self.spider))
|
||||
self.assertEquals(out, reqs)
|
||||
|
Loading…
x
Reference in New Issue
Block a user