allow spider allowed_domains to be set/tuple, #259

2025-02-27 21:04:12 +00:00 · 2013-03-06 00:19:47 -06:00 · 2013-03-06 00:19:47 -06:00 · b48ec1dce4
commit b48ec1dce4
parent 19d0942c74
1 changed files with 8 additions and 4 deletions
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@ -5,7 +5,6 @@ library.
 Some of the functions that used to be imported from this module have been moved
 to the w3lib.url module. Always import those from there instead.
 """
 import urlparse
 import urllib
 import cgi
@ -13,6 +12,7 @@ import cgi
 from w3lib.url import *
 from scrapy.utils.python import unicode_to_str
 def url_is_from_any_domain(url, domains):
    """Return True if the url belongs to any of the given domains"""
    host = parse_url(url).netloc
@ -22,15 +22,18 @@ def url_is_from_any_domain(url, domains):
    else:
        return False
 def url_is_from_spider(url, spider):
    """Return True if the url belongs to the given spider"""
-    return url_is_from_any_domain(url, [spider.name] + \
+    return url_is_from_any_domain(url,
-        getattr(spider, 'allowed_domains', []))
+        [spider.name] + list(getattr(spider, 'allowed_domains', [])))
 def url_has_any_extension(url, extensions):
    return posixpath.splitext(parse_url(url).path)[1].lower() in extensions
-def canonicalize_url(url, keep_blank_values=True, keep_fragments=False, \
+
 def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
        encoding=None):
    """Canonicalize the given url by applying the following procedures:
@ -70,6 +73,7 @@ def parse_url(url, encoding=None):
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))
 def escape_ajax(url):
    """
    Return the crawleable url according to: