Do not call body_as_unicode on non text responses. closes #462

2025-02-24 08:24:21 +00:00 · 2013-11-19 20:13:34 -02:00 · 2013-11-19 20:13:34 -02:00 · 3f156ad845
commit 3f156ad845
parent ec7833a910
3 changed files with 36 additions and 27 deletions
--- a/scrapy/tests/test_utils_iterators.py
+++ b/scrapy/tests/test_utils_iterators.py
@ -1,13 +1,14 @@
 import os
 from twisted.trial import unittest

-from scrapy.utils.iterators import csviter, xmliter
+from scrapy.utils.iterators import csviter, xmliter, _body_or_str
 from scrapy.contrib_exp.iterators import xmliter_lxml
 from scrapy.http import XmlResponse, TextResponse, Response
 from scrapy.tests import get_testdata

 FOOBAR_NL = u"foo" + os.linesep + u"bar"

+
 class XmliterTestCase(unittest.TestCase):

    xmliter = staticmethod(xmliter)
@ -173,7 +174,6 @@ class UtilsCsvTestCase(unittest.TestCase):
                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
                          {u'id': u'4', u'name': u'empty',   u'value': u''}])

-
    def test_csviter_headers(self):
        sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
        headers, body = sample[0].split(','), '\n'.join(sample[1:])
@ -229,5 +229,29 @@ class UtilsCsvTestCase(unittest.TestCase):
             {u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])


+class TestHelper(unittest.TestCase):
+    bbody = b'utf8-body'
+    ubody = bbody.decode('utf8')
+    txtresponse = TextResponse(url='http://example.org/', body=bbody, encoding='utf-8')
+    response = Response(url='http://example.org/', body=bbody)
+
+    def test_body_or_str(self):
+        for obj in (self.bbody, self.ubody, self.txtresponse, self.response):
+            r1 = _body_or_str(obj)
+            self._assert_type_and_value(r1, self.ubody, obj)
+            r2 = _body_or_str(obj, unicode=True)
+            self._assert_type_and_value(r2, self.ubody, obj)
+            r3 = _body_or_str(obj, unicode=False)
+            self._assert_type_and_value(r3, self.bbody, obj)
+            self.assertTrue(type(r1) is type(r2))
+            self.assertTrue(type(r1) is not type(r3))
+
+
+    def _assert_type_and_value(self, a, b, obj):
+        self.assertTrue(type(a) is type(b),
+                        'Got {}, expected {} for {!r}'.format(type(a), type(b), obj))
+        self.assertEqual(a, b)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/scrapy/tests/test_utils_response.py
+++ b/scrapy/tests/test_utils_response.py
@ -3,33 +3,13 @@ import unittest
 import urlparse

 from scrapy.http import Response, TextResponse, HtmlResponse
-from scrapy.utils.response import body_or_str, response_httprepr, open_in_browser, \
-    get_meta_refresh
+from scrapy.utils.response import response_httprepr, open_in_browser, get_meta_refresh

 __doctests__ = ['scrapy.utils.response']

 class ResponseUtilsTest(unittest.TestCase):
    dummy_response = TextResponse(url='http://example.org/', body='dummy_response')

-    def test_body_or_str_input(self):
-        self.assertTrue(isinstance(body_or_str(self.dummy_response), basestring))
-        self.assertTrue(isinstance(body_or_str('text'), basestring))
-        self.assertRaises(Exception, body_or_str, 2)
-
-    def test_body_or_str_extraction(self):
-        self.assertEqual(body_or_str(self.dummy_response), 'dummy_response')
-        self.assertEqual(body_or_str('text'), 'text')
-
-    def test_body_or_str_encoding(self):
-        self.assertTrue(isinstance(body_or_str(self.dummy_response, unicode=False), str))
-        self.assertTrue(isinstance(body_or_str(self.dummy_response, unicode=True), unicode))
-
-        self.assertTrue(isinstance(body_or_str('text', unicode=False), str))
-        self.assertTrue(isinstance(body_or_str('text', unicode=True), unicode))
-
-        self.assertTrue(isinstance(body_or_str(u'text', unicode=False), str))
-        self.assertTrue(isinstance(body_or_str(u'text', unicode=True), unicode))
-
    def test_response_httprepr(self):
        r1 = Response("http://www.example.com")
        self.assertEqual(response_httprepr(r1), 'HTTP/1.1 200 OK\r\n\r\n')
--- a/scrapy/utils/iterators.py
+++ b/scrapy/utils/iterators.py
@ -71,8 +71,13 @@ def _body_or_str(obj, unicode=True):
    assert isinstance(obj, (Response, basestring)), \
        "obj must be Response or basestring, not %s" % type(obj).__name__
    if isinstance(obj, Response):
-        return obj.body_as_unicode() if unicode else obj.body
-    elif isinstance(obj, str):
-        return obj.decode('utf-8') if unicode else obj
-    else:
+        if not unicode:
+            return obj.body
+        elif isinstance(obj, TextResponse):
+            return obj.body_as_unicode()
+        else:
+            return obj.body.decode('utf-8')
+    elif type(obj) is type(u''):
        return obj if unicode else obj.encode('utf-8')
+    else:
+        return obj.decode('utf-8') if unicode else obj