added excluded characters, also implemented dan's readability tips

--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40436
2025-02-26 19:43:40 +00:00 · 2008-11-26 18:31:53 +00:00 · 2008-11-26 18:31:53 +00:00 · 8d08ab5f98
commit 8d08ab5f98
parent 874331a5b7
2 changed files with 8 additions and 5 deletions
--- a/scrapy/trunk/scrapy/tests/test_utils_url.py
+++ b/scrapy/trunk/scrapy/tests/test_utils_url.py
@ -158,8 +158,9 @@ class UrlUtilsTest(unittest.TestCase):
        self.assertFalse(check_valid_urlencode('http://www.example.com/?q=foo bar&q2=foo2 bar2'))
        self.assertTrue(check_valid_urlencode('http://www.example.com/?q=foo+bar&q2=foo2%20bar2'))
-        self.assertFalse(check_valid_urlencode('http://www.example.com/product,little:london$set%'))
+        self.assertFalse(check_valid_urlencode('http://www.example.com/.,:;!@$%^*()_-[]{}|'))
-        self.assertTrue(check_valid_urlencode('http://www.example.com/product%2Clittle%3Alondon%24set%25'))
+        self.assertTrue(check_valid_urlencode('http://www.example.com/.,:;!@%24%25%5E*()_-%5B%5D%7B%7D%7C'))
        self.assertTrue(check_valid_urlencode('http://www.example.com/.%2C%3A%3B%21%40%24%25%5E%2A%28%29_-%5B%5D%7B%7D%7C'))
 if __name__ == "__main__":
    unittest.main()
--- a/scrapy/trunk/scrapy/utils/url.py
+++ b/scrapy/trunk/scrapy/utils/url.py
@ -150,9 +150,11 @@ def canonicalize_url(url, keep_blank_values=False, keep_fragments=False):
 def check_valid_urlencode(url):
    """ check that the url-path and arguments are properly quoted """
    def check_str(s):
-        return all((s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+')))
+        for ignore_char in ',:;!@*()':
            s = s.replace(ignore_char, '')
        return all(s_plus == urllib.quote(urllib.unquote(s_plus)) for s_plus in s.split('+'))
    def check_param(p):
-        return all((check_str(s) for s in p.split('=', 1)))
+        return all(check_str(s) for s in p.split('=', 1))
    split_result = urlparse.urlsplit(url)
-    return check_str(split_result[2]) and all((check_param(p) for p in split_result[3].split('&')))
+    return check_str(split_result.path) and all(check_param(p) for p in split_result.query.split('&'))