1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 13:23:59 +00:00

Issues a warning when user puts a URL into allowed_domains (#2250)

This commit is contained in:
Jana Cavojska 2017-11-18 20:03:59 +01:00
parent b8870ee8a1
commit 62a6261028

View File

@ -52,6 +52,10 @@ class OffsiteMiddleware(object):
allowed_domains = getattr(spider, 'allowed_domains', None)
if not allowed_domains:
return re.compile('') # allow all by default
for domainIndex in range(0, len(allowed_domains)):
url_pattern = re.compile("^https?://.*$")
if url_pattern.match(allowed_domains[domainIndex]):
logger.warn("allowed_domains accepts only domains, not URLs. Ignoring URL entry %s in allowed_domains." % allowed_domains[domainIndex])
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains if d is not None)
return re.compile(regex)