diff --git a/scrapy/trunk/scrapy/tests/test_utils_url.py b/scrapy/trunk/scrapy/tests/test_utils_url.py
index 2b0fcb660..e68136a6a 100644
--- a/scrapy/trunk/scrapy/tests/test_utils_url.py
+++ b/scrapy/trunk/scrapy/tests/test_utils_url.py
@@ -82,9 +82,12 @@ class UrlUtilsTest(unittest.TestCase):
                          'product.html?id=200&foo=bar')
 
     def test_canonicalize_url(self):
-        # no query arguments
+        # simplest case
         self.assertEqual(canonicalize_url("http://www.example.com"),
-                         "http://www.example.com")
+                                          "http://www.example.com")
+
+        # always return a str
+        assert isinstance(canonicalize_url(u"http://www.example.com"), str)
 
         # typical usage
         self.assertEqual(canonicalize_url("http://www.example.com/do?a=1&b=2&c=3"),
@@ -96,7 +99,7 @@ class UrlUtilsTest(unittest.TestCase):
         
         # sorting by argument values
         self.assertEqual(canonicalize_url("http://www.example.com/do?c=3&b=5&b=2&a=50"),
-                         "http://www.example.com/do?a=50&b=2&b=5&c=3")
+                                          "http://www.example.com/do?a=50&b=2&b=5&c=3")
 
         # using keep_blank_values
         self.assertEqual(canonicalize_url("http://www.example.com/do?b=&a=2"),
@@ -148,6 +151,14 @@ class UrlUtilsTest(unittest.TestCase):
         self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag", keep_fragments=True),
                                           u"http://user:pass@www.example.com/do?a=1#frag")
 
+        # urllib.quote uses a mapping cache of encoded characters. when parsing
+        # an already percent-encoded url, it will fail if that url was not
+        # percent-encoded as utf-8, that's why canonicalize_url must always
+        # convert the urls to string. the following test asserts that
+        # functionality.
+        self.assertEqual(canonicalize_url(u'http://www.example.com/caf%E9-con-leche.htm'),
+                                           'http://www.example.com/caf%E9-con-leche.htm')
+
     def test_check_valid_urlencode(self):
         self.assertFalse(check_valid_urlencode(r'http://www.example.com/pictures\detail\CAN43664.jpg'))
         self.assertTrue(check_valid_urlencode('http://www.example.com/pictures%5Cdetail%5CCAN43664.jpg'))
diff --git a/scrapy/trunk/scrapy/utils/url.py b/scrapy/trunk/scrapy/utils/url.py
index 5f07283f7..a2b586d5c 100644
--- a/scrapy/trunk/scrapy/utils/url.py
+++ b/scrapy/trunk/scrapy/utils/url.py
@@ -125,7 +125,7 @@ def add_or_replace_parameter(url, name, new_value, sep='&'):
     return next_url
 
 def canonicalize_url(url, keep_blank_values=False, keep_fragments=False):
-    """Canonicalize url by applying the following procedures:
+    """Canonicalize the given url by applying the following procedures:
 
     - sort query arguments, first by key, then by value
     - percent encode paths and query arguments. non-ASCII characters are
@@ -135,9 +135,13 @@ def canonicalize_url(url, keep_blank_values=False, keep_fragments=False):
     - remove query arguments with blank values (unless keep_blank_values is True)
     - remove fragments (unless keep_fragments is True)
 
+    The url passed can be a str or unicode, while the url returned is always a
+    str.
+
     For examples see the tests in scrapy.tests.test_utils_url
     """
 
+    url = url.encode('utf-8')
     parts = list(urlparse.urlparse(url))
     keyvals = cgi.parse_qsl(parts[4], keep_blank_values)
     keyvals.sort()