mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 16:03:49 +00:00
Merge pull request #128 from dangra/cannonicalize-missing-url-path
handle missing paths in urls as /
This commit is contained in:
commit
43028876b5
@ -60,12 +60,15 @@ class UrlUtilsTest(unittest.TestCase):
|
|||||||
|
|
||||||
def test_canonicalize_url(self):
|
def test_canonicalize_url(self):
|
||||||
# simplest case
|
# simplest case
|
||||||
self.assertEqual(canonicalize_url("http://www.example.com"),
|
self.assertEqual(canonicalize_url("http://www.example.com/"),
|
||||||
"http://www.example.com")
|
"http://www.example.com/")
|
||||||
|
|
||||||
# always return a str
|
# always return a str
|
||||||
assert isinstance(canonicalize_url(u"http://www.example.com"), str)
|
assert isinstance(canonicalize_url(u"http://www.example.com"), str)
|
||||||
|
|
||||||
|
# append missing path
|
||||||
|
self.assertEqual(canonicalize_url("http://www.example.com"),
|
||||||
|
"http://www.example.com/")
|
||||||
# typical usage
|
# typical usage
|
||||||
self.assertEqual(canonicalize_url("http://www.example.com/do?a=1&b=2&c=3"),
|
self.assertEqual(canonicalize_url("http://www.example.com/do?a=1&b=2&c=3"),
|
||||||
"http://www.example.com/do?a=1&b=2&c=3")
|
"http://www.example.com/do?a=1&b=2&c=3")
|
||||||
@ -145,8 +148,8 @@ class UrlUtilsTest(unittest.TestCase):
|
|||||||
'http://www.example.com/caf%E9-con-leche.htm')
|
'http://www.example.com/caf%E9-con-leche.htm')
|
||||||
|
|
||||||
# domains are case insensitive
|
# domains are case insensitive
|
||||||
self.assertEqual(canonicalize_url("http://www.EXAMPLE.com"),
|
self.assertEqual(canonicalize_url("http://www.EXAMPLE.com/"),
|
||||||
"http://www.example.com")
|
"http://www.example.com/")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -52,7 +52,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False, \
|
|||||||
keyvals = cgi.parse_qsl(query, keep_blank_values)
|
keyvals = cgi.parse_qsl(query, keep_blank_values)
|
||||||
keyvals.sort()
|
keyvals.sort()
|
||||||
query = urllib.urlencode(keyvals)
|
query = urllib.urlencode(keyvals)
|
||||||
path = safe_url_string(urllib.unquote(path))
|
path = safe_url_string(urllib.unquote(path)) or '/'
|
||||||
fragment = '' if not keep_fragments else fragment
|
fragment = '' if not keep_fragments else fragment
|
||||||
return urlparse.urlunparse((scheme, netloc.lower(), path, params, query, fragment))
|
return urlparse.urlunparse((scheme, netloc.lower(), path, params, query, fragment))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user