mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 11:03:56 +00:00
Merge pull request #128 from dangra/cannonicalize-missing-url-path
handle missing paths in urls as /
This commit is contained in:
commit
43028876b5
@ -60,12 +60,15 @@ class UrlUtilsTest(unittest.TestCase):
|
||||
|
||||
def test_canonicalize_url(self):
|
||||
# simplest case
|
||||
self.assertEqual(canonicalize_url("http://www.example.com"),
|
||||
"http://www.example.com")
|
||||
self.assertEqual(canonicalize_url("http://www.example.com/"),
|
||||
"http://www.example.com/")
|
||||
|
||||
# always return a str
|
||||
assert isinstance(canonicalize_url(u"http://www.example.com"), str)
|
||||
|
||||
# append missing path
|
||||
self.assertEqual(canonicalize_url("http://www.example.com"),
|
||||
"http://www.example.com/")
|
||||
# typical usage
|
||||
self.assertEqual(canonicalize_url("http://www.example.com/do?a=1&b=2&c=3"),
|
||||
"http://www.example.com/do?a=1&b=2&c=3")
|
||||
@ -145,8 +148,8 @@ class UrlUtilsTest(unittest.TestCase):
|
||||
'http://www.example.com/caf%E9-con-leche.htm')
|
||||
|
||||
# domains are case insensitive
|
||||
self.assertEqual(canonicalize_url("http://www.EXAMPLE.com"),
|
||||
"http://www.example.com")
|
||||
self.assertEqual(canonicalize_url("http://www.EXAMPLE.com/"),
|
||||
"http://www.example.com/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -52,7 +52,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False, \
|
||||
keyvals = cgi.parse_qsl(query, keep_blank_values)
|
||||
keyvals.sort()
|
||||
query = urllib.urlencode(keyvals)
|
||||
path = safe_url_string(urllib.unquote(path))
|
||||
path = safe_url_string(urllib.unquote(path)) or '/'
|
||||
fragment = '' if not keep_fragments else fragment
|
||||
return urlparse.urlunparse((scheme, netloc.lower(), path, params, query, fragment))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user