1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 08:24:21 +00:00

Do not call body_as_unicode on non text responses. closes #462

This commit is contained in:
Daniel Graña 2013-11-19 20:13:34 -02:00
parent ec7833a910
commit 3f156ad845
3 changed files with 36 additions and 27 deletions

View File

@ -1,13 +1,14 @@
import os
from twisted.trial import unittest
from scrapy.utils.iterators import csviter, xmliter
from scrapy.utils.iterators import csviter, xmliter, _body_or_str
from scrapy.contrib_exp.iterators import xmliter_lxml
from scrapy.http import XmlResponse, TextResponse, Response
from scrapy.tests import get_testdata
FOOBAR_NL = u"foo" + os.linesep + u"bar"
class XmliterTestCase(unittest.TestCase):
xmliter = staticmethod(xmliter)
@ -173,7 +174,6 @@ class UtilsCsvTestCase(unittest.TestCase):
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_headers(self):
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
headers, body = sample[0].split(','), '\n'.join(sample[1:])
@ -229,5 +229,29 @@ class UtilsCsvTestCase(unittest.TestCase):
{u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])
class TestHelper(unittest.TestCase):
bbody = b'utf8-body'
ubody = bbody.decode('utf8')
txtresponse = TextResponse(url='http://example.org/', body=bbody, encoding='utf-8')
response = Response(url='http://example.org/', body=bbody)
def test_body_or_str(self):
for obj in (self.bbody, self.ubody, self.txtresponse, self.response):
r1 = _body_or_str(obj)
self._assert_type_and_value(r1, self.ubody, obj)
r2 = _body_or_str(obj, unicode=True)
self._assert_type_and_value(r2, self.ubody, obj)
r3 = _body_or_str(obj, unicode=False)
self._assert_type_and_value(r3, self.bbody, obj)
self.assertTrue(type(r1) is type(r2))
self.assertTrue(type(r1) is not type(r3))
def _assert_type_and_value(self, a, b, obj):
self.assertTrue(type(a) is type(b),
'Got {}, expected {} for {!r}'.format(type(a), type(b), obj))
self.assertEqual(a, b)
if __name__ == "__main__":
unittest.main()

View File

@ -3,33 +3,13 @@ import unittest
import urlparse
from scrapy.http import Response, TextResponse, HtmlResponse
from scrapy.utils.response import body_or_str, response_httprepr, open_in_browser, \
get_meta_refresh
from scrapy.utils.response import response_httprepr, open_in_browser, get_meta_refresh
__doctests__ = ['scrapy.utils.response']
class ResponseUtilsTest(unittest.TestCase):
dummy_response = TextResponse(url='http://example.org/', body='dummy_response')
def test_body_or_str_input(self):
self.assertTrue(isinstance(body_or_str(self.dummy_response), basestring))
self.assertTrue(isinstance(body_or_str('text'), basestring))
self.assertRaises(Exception, body_or_str, 2)
def test_body_or_str_extraction(self):
self.assertEqual(body_or_str(self.dummy_response), 'dummy_response')
self.assertEqual(body_or_str('text'), 'text')
def test_body_or_str_encoding(self):
self.assertTrue(isinstance(body_or_str(self.dummy_response, unicode=False), str))
self.assertTrue(isinstance(body_or_str(self.dummy_response, unicode=True), unicode))
self.assertTrue(isinstance(body_or_str('text', unicode=False), str))
self.assertTrue(isinstance(body_or_str('text', unicode=True), unicode))
self.assertTrue(isinstance(body_or_str(u'text', unicode=False), str))
self.assertTrue(isinstance(body_or_str(u'text', unicode=True), unicode))
def test_response_httprepr(self):
r1 = Response("http://www.example.com")
self.assertEqual(response_httprepr(r1), 'HTTP/1.1 200 OK\r\n\r\n')

View File

@ -71,8 +71,13 @@ def _body_or_str(obj, unicode=True):
assert isinstance(obj, (Response, basestring)), \
"obj must be Response or basestring, not %s" % type(obj).__name__
if isinstance(obj, Response):
return obj.body_as_unicode() if unicode else obj.body
elif isinstance(obj, str):
return obj.decode('utf-8') if unicode else obj
else:
if not unicode:
return obj.body
elif isinstance(obj, TextResponse):
return obj.body_as_unicode()
else:
return obj.body.decode('utf-8')
elif type(obj) is type(u''):
return obj if unicode else obj.encode('utf-8')
else:
return obj.decode('utf-8') if unicode else obj