1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 12:23:47 +00:00

Merge pull request #745 from crlane/cl/offsite-middleware-bugfix

Cl/offsite middleware bugfix
This commit is contained in:
Daniel Graña 2014-06-18 03:01:21 -03:00
commit 7449b25bed
2 changed files with 14 additions and 1 deletions

View File

@ -49,7 +49,7 @@ class OffsiteMiddleware(object):
allowed_domains = getattr(spider, 'allowed_domains', None)
if not allowed_domains:
return re.compile('') # allow all by default
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains)
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains if d is not None)
return re.compile(regex)
def spider_opened(self, spider):

View File

@ -5,6 +5,7 @@ from scrapy.spider import Spider
from scrapy.contrib.spidermiddleware.offsite import OffsiteMiddleware
from scrapy.utils.test import get_crawler
from urlparse import urlparse
class TestOffsiteMiddleware(TestCase):
@ -52,3 +53,15 @@ class TestOffsiteMiddleware3(TestOffsiteMiddleware2):
def _get_spider(self):
return Spider('foo')
class TestOffsiteMiddleware4(TestOffsiteMiddleware3):
def _get_spider(self):
bad_hostname = urlparse('http:////scrapytest.org').hostname
return Spider('foo', allowed_domains=['scrapytest.org', None, bad_hostname])
def test_process_spider_output(self):
res = Response('http://scrapytest.org')
reqs = [Request('http://scrapytest.org/1')]
out = list(self.mw.process_spider_output(res, reqs, self.spider))
self.assertEquals(out, reqs)