1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 04:04:21 +00:00

Applied patch to ClientForm to fix bug with wrong entities. Also added tests and left patch in repo in case we upgrade ClientForm in the future and need to re-apply it

This commit is contained in:
Pablo Hoffman 2010-07-13 19:46:53 -03:00
parent 9e37ec4230
commit cc32f6ec66
3 changed files with 29 additions and 1 deletions

View File

@ -0,0 +1,8 @@
import unittest
from scrapy.xlib import ClientForm
class ClientFormPatchTests(unittest.TestCase):
def test_patched_unescape_charref(self):
self.assertEqual(ClientForm.unescape_charref('c', 'utf-8'), 'c')

View File

@ -0,0 +1,15 @@
diff --git a/scrapy/xlib/ClientForm.py b/scrapy/xlib/ClientForm.py
--- a/scrapy/xlib/ClientForm.py
+++ b/scrapy/xlib/ClientForm.py
@@ -242,5 +242,10 @@ def unescape_charref(data, encoding):
if name.startswith("x"):
name, base= name[1:], 16
- uc = unichr(int(name, base))
+ try:
+ uc = unichr(int(name, base))
+ except ValueError:
+ # invalid literal for int()
+ # or integer not in unichr()'s range
+ uc = name
if encoding is None:
return uc

View File

@ -241,7 +241,12 @@ def unescape_charref(data, encoding):
name, base = data, 10
if name.startswith("x"):
name, base= name[1:], 16
uc = unichr(int(name, base))
try:
uc = unichr(int(name, base))
except ValueError:
# invalid literal for int()
# or integer not in unichr()'s range
uc = name
if encoding is None:
return uc
else: