mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 19:43:40 +00:00
added excluded characters, also implemented dan's readability tips
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40436
This commit is contained in:
parent
874331a5b7
commit
8d08ab5f98
@ -158,8 +158,9 @@ class UrlUtilsTest(unittest.TestCase):
|
|||||||
self.assertFalse(check_valid_urlencode('http://www.example.com/?q=foo bar&q2=foo2 bar2'))
|
self.assertFalse(check_valid_urlencode('http://www.example.com/?q=foo bar&q2=foo2 bar2'))
|
||||||
self.assertTrue(check_valid_urlencode('http://www.example.com/?q=foo+bar&q2=foo2%20bar2'))
|
self.assertTrue(check_valid_urlencode('http://www.example.com/?q=foo+bar&q2=foo2%20bar2'))
|
||||||
|
|
||||||
self.assertFalse(check_valid_urlencode('http://www.example.com/product,little:london$set%'))
|
self.assertFalse(check_valid_urlencode('http://www.example.com/.,:;!@$%^*()_-[]{}|'))
|
||||||
self.assertTrue(check_valid_urlencode('http://www.example.com/product%2Clittle%3Alondon%24set%25'))
|
self.assertTrue(check_valid_urlencode('http://www.example.com/.,:;!@%24%25%5E*()_-%5B%5D%7B%7D%7C'))
|
||||||
|
self.assertTrue(check_valid_urlencode('http://www.example.com/.%2C%3A%3B%21%40%24%25%5E%2A%28%29_-%5B%5D%7B%7D%7C'))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -150,9 +150,11 @@ def canonicalize_url(url, keep_blank_values=False, keep_fragments=False):
|
|||||||
def check_valid_urlencode(url):
|
def check_valid_urlencode(url):
|
||||||
""" check that the url-path and arguments are properly quoted """
|
""" check that the url-path and arguments are properly quoted """
|
||||||
def check_str(s):
|
def check_str(s):
|
||||||
return all((s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+')))
|
for ignore_char in ',:;!@*()':
|
||||||
|
s = s.replace(ignore_char, '')
|
||||||
|
return all(s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+'))
|
||||||
def check_param(p):
|
def check_param(p):
|
||||||
return all((check_str(s) for s in p.split('=', 1)))
|
return all(check_str(s) for s in p.split('=', 1))
|
||||||
|
|
||||||
split_result = urlparse.urlsplit(url)
|
split_result = urlparse.urlsplit(url)
|
||||||
return check_str(split_result[2]) and all((check_param(p) for p in split_result[3].split('&')))
|
return check_str(split_result.path) and all(check_param(p) for p in split_result.query.split('&'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user