mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 06:43:43 +00:00
added DEFAULT_RESPONSE_ENCODING setting
This commit is contained in:
parent
3d7a4c890e
commit
7296a7b889
@ -339,6 +339,17 @@ Default::
|
||||
The default headers used for Scrapy HTTP Requests. They're populated in the
|
||||
:class:`~scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware`.
|
||||
|
||||
.. setting:: DEFAULT_RESPONSE_ENCODING
|
||||
|
||||
DEFAULT_RESPONSE_ENCODING
|
||||
-------------------------
|
||||
|
||||
Default: ``'ascii'``
|
||||
|
||||
The default encoding to use for :class:`~scrapy.http.TextResponse` objects (and
|
||||
subclasses) when no encoding is declared and no encoding could be inferred from
|
||||
the body.
|
||||
|
||||
.. setting:: DEPTH_LIMIT
|
||||
|
||||
DEPTH_LIMIT
|
||||
|
@ -37,6 +37,8 @@ DEFAULT_REQUEST_HEADERS = {
|
||||
'Accept-Language': 'en',
|
||||
}
|
||||
|
||||
DEFAULT_RESPONSE_ENCODING = 'ascii'
|
||||
|
||||
DEPTH_LIMIT = 0
|
||||
DEPTH_STATS = True
|
||||
|
||||
|
@ -11,9 +11,11 @@ from scrapy.xlib.BeautifulSoup import UnicodeDammit
|
||||
|
||||
from scrapy.http.response import Response
|
||||
from scrapy.utils.python import memoizemethod_noargs
|
||||
from scrapy.conf import settings
|
||||
|
||||
class TextResponse(Response):
|
||||
|
||||
_DEFAULT_ENCODING = settings['DEFAULT_RESPONSE_ENCODING']
|
||||
_ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I)
|
||||
|
||||
__slots__ = ['_encoding', '_body_inferred_encoding']
|
||||
@ -71,6 +73,8 @@ class TextResponse(Response):
|
||||
self._body_declared_encoding())
|
||||
dammit = UnicodeDammit(self.body, possible_encodings)
|
||||
self._body_inferred_encoding = dammit.originalEncoding
|
||||
if self._body_inferred_encoding in ('ascii', None):
|
||||
self._body_inferred_encoding = self._DEFAULT_ENCODING
|
||||
return dammit.unicode
|
||||
|
||||
def body_encoding(self):
|
||||
|
@ -2,6 +2,7 @@ import unittest
|
||||
import weakref
|
||||
|
||||
from scrapy.http import Response, TextResponse, HtmlResponse, XmlResponse, Headers
|
||||
from scrapy.conf import settings
|
||||
|
||||
|
||||
class BaseResponseTest(unittest.TestCase):
|
||||
@ -138,8 +139,10 @@ class TextResponseTest(BaseResponseTest):
|
||||
self.assertEqual(r3.encoding, "latin1")
|
||||
|
||||
def test_unicode_url(self):
|
||||
# instantiate with unicode url without encoding
|
||||
self.assertRaises(TypeError, self.response_class, u"http://www.example.com/")
|
||||
# instantiate with unicode url without encoding (should set default encoding)
|
||||
resp = self.response_class(u"http://www.example.com/")
|
||||
self.assertEqual(resp.encoding, settings['DEFAULT_RESPONSE_ENCODING'])
|
||||
|
||||
# make sure urls are converted to str
|
||||
resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8')
|
||||
assert isinstance(resp.url, str)
|
||||
@ -187,7 +190,6 @@ class TextResponseTest(BaseResponseTest):
|
||||
# TextResponse (and subclasses) must be passed a encoding when instantiating with unicode bodies
|
||||
self.assertRaises(TypeError, self.response_class, "http://www.example.com", body=u"\xa3")
|
||||
|
||||
|
||||
class HtmlResponseTest(TextResponseTest):
|
||||
|
||||
response_class = HtmlResponse
|
||||
@ -229,8 +231,7 @@ class XmlResponseTest(TextResponseTest):
|
||||
|
||||
body = "<xml></xml>"
|
||||
r1 = self.response_class("http://www.example.com", body=body)
|
||||
# XXX: we may want to swtich default XmlResponse encoding to utf-8
|
||||
self._assert_response_values(r1, 'ascii', body)
|
||||
self._assert_response_values(r1, settings['DEFAULT_RESPONSE_ENCODING'], body)
|
||||
|
||||
body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
|
||||
r2 = self.response_class("http://www.example.com", body=body)
|
||||
|
Loading…
x
Reference in New Issue
Block a user