1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 19:43:40 +00:00

added excluded characters, also implemented dan's readability tips

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40436
This commit is contained in:
samus_ 2008-11-26 18:31:53 +00:00
parent 874331a5b7
commit 8d08ab5f98
2 changed files with 8 additions and 5 deletions

View File

@ -158,8 +158,9 @@ class UrlUtilsTest(unittest.TestCase):
self.assertFalse(check_valid_urlencode('http://www.example.com/?q=foo bar&q2=foo2 bar2')) self.assertFalse(check_valid_urlencode('http://www.example.com/?q=foo bar&q2=foo2 bar2'))
self.assertTrue(check_valid_urlencode('http://www.example.com/?q=foo+bar&q2=foo2%20bar2')) self.assertTrue(check_valid_urlencode('http://www.example.com/?q=foo+bar&q2=foo2%20bar2'))
self.assertFalse(check_valid_urlencode('http://www.example.com/product,little:london$set%')) self.assertFalse(check_valid_urlencode('http://www.example.com/.,:;!@$%^*()_-[]{}|'))
self.assertTrue(check_valid_urlencode('http://www.example.com/product%2Clittle%3Alondon%24set%25')) self.assertTrue(check_valid_urlencode('http://www.example.com/.,:;!@%24%25%5E*()_-%5B%5D%7B%7D%7C'))
self.assertTrue(check_valid_urlencode('http://www.example.com/.%2C%3A%3B%21%40%24%25%5E%2A%28%29_-%5B%5D%7B%7D%7C'))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -150,9 +150,11 @@ def canonicalize_url(url, keep_blank_values=False, keep_fragments=False):
def check_valid_urlencode(url): def check_valid_urlencode(url):
""" check that the url-path and arguments are properly quoted """ """ check that the url-path and arguments are properly quoted """
def check_str(s): def check_str(s):
return all((s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+'))) for ignore_char in ',:;!@*()':
s = s.replace(ignore_char, '')
return all(s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+'))
def check_param(p): def check_param(p):
return all((check_str(s) for s in p.split('=', 1))) return all(check_str(s) for s in p.split('=', 1))
split_result = urlparse.urlsplit(url) split_result = urlparse.urlsplit(url)
return check_str(split_result[2]) and all((check_param(p) for p in split_result[3].split('&'))) return check_str(split_result.path) and all(check_param(p) for p in split_result.query.split('&'))