mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 04:04:21 +00:00
Applied patch to ClientForm to fix bug with wrong entities. Also added tests and left patch in repo in case we upgrade ClientForm in the future and need to re-apply it
This commit is contained in:
parent
9e37ec4230
commit
cc32f6ec66
8
scrapy/tests/test_clientform.py
Normal file
8
scrapy/tests/test_clientform.py
Normal file
@ -0,0 +1,8 @@
|
||||
import unittest
|
||||
|
||||
from scrapy.xlib import ClientForm
|
||||
|
||||
class ClientFormPatchTests(unittest.TestCase):
|
||||
|
||||
def test_patched_unescape_charref(self):
|
||||
self.assertEqual(ClientForm.unescape_charref('c', 'utf-8'), 'c')
|
15
scrapy/xlib/ClientForm.patch
Normal file
15
scrapy/xlib/ClientForm.patch
Normal file
@ -0,0 +1,15 @@
|
||||
diff --git a/scrapy/xlib/ClientForm.py b/scrapy/xlib/ClientForm.py
|
||||
--- a/scrapy/xlib/ClientForm.py
|
||||
+++ b/scrapy/xlib/ClientForm.py
|
||||
@@ -242,5 +242,10 @@ def unescape_charref(data, encoding):
|
||||
if name.startswith("x"):
|
||||
name, base= name[1:], 16
|
||||
- uc = unichr(int(name, base))
|
||||
+ try:
|
||||
+ uc = unichr(int(name, base))
|
||||
+ except ValueError:
|
||||
+ # invalid literal for int()
|
||||
+ # or integer not in unichr()'s range
|
||||
+ uc = name
|
||||
if encoding is None:
|
||||
return uc
|
@ -241,7 +241,12 @@ def unescape_charref(data, encoding):
|
||||
name, base = data, 10
|
||||
if name.startswith("x"):
|
||||
name, base= name[1:], 16
|
||||
uc = unichr(int(name, base))
|
||||
try:
|
||||
uc = unichr(int(name, base))
|
||||
except ValueError:
|
||||
# invalid literal for int()
|
||||
# or integer not in unichr()'s range
|
||||
uc = name
|
||||
if encoding is None:
|
||||
return uc
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user