1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 05:24:33 +00:00

improved check

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40428
This commit is contained in:
samus_ 2008-11-26 12:05:31 +00:00
parent 537a3457cf
commit fe49bc2011
2 changed files with 16 additions and 6 deletions

View File

@ -149,12 +149,18 @@ class UrlUtilsTest(unittest.TestCase):
u"http://user:pass@www.example.com/do?a=1#frag")
def test_check_valid_urlencode(self):
self.assertFalse(check_valid_urlencode(r'http://www.example.com/pictures detail CAN43664.jpg'))
self.assertTrue(check_valid_urlencode('http://www.example.com/pictures%20detail%20CAN43664.jpg'))
self.assertFalse(check_valid_urlencode(r'http://www.example.com/pictures\detail\CAN43664.jpg'))
self.assertTrue(check_valid_urlencode('http://www.example.com/pictures%5Cdetail%5CCAN43664.jpg'))
self.assertFalse(check_valid_urlencode('http://www.example.com/pictures detail CAN43664.jpg'))
self.assertTrue(check_valid_urlencode('http://www.example.com/pictures+detail%20CAN43664.jpg'))
self.assertFalse(check_valid_urlencode('http://www.example.com/?q=foo bar&q2=foo2 bar2'))
self.assertTrue(check_valid_urlencode('http://www.example.com/?q=foo+bar&q2=foo2%20bar2'))
self.assertFalse(check_valid_urlencode('http://www.example.com/product,little:london$set%'))
self.assertTrue(check_valid_urlencode('http://www.example.com/product%2Clittle%3Alondon%24set%25'))
if __name__ == "__main__":
unittest.main()

View File

@ -148,7 +148,11 @@ def canonicalize_url(url, keep_blank_values=False, keep_fragments=False):
return urlparse.urlunparse(parts)
def check_valid_urlencode(url):
""" check that the url-path is properly quoted
TODO should also check the parameters """
""" check that the url-path and arguments are properly quoted """
def check_str(s):
return all((s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+')))
def check_param(p):
return all((check_str(s) for s in p.split('=', 1)))
split_result = urlparse.urlsplit(url)
return split_result[2] == urllib.quote(urllib.unquote(split_result[2]))
return check_str(split_result[2]) and all((check_param(p) for p in split_result[3].split('&')))