1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 16:04:01 +00:00

Merge remote-tracking branch 'origin/master' into xmliter-unicode

This commit is contained in:
Paul Tremberth 2016-01-13 11:42:01 +01:00
commit 2f2c2e8096
3 changed files with 17 additions and 14 deletions

View File

@ -1,12 +1,11 @@
import re
import csv
import logging
try:
from cStringIO import StringIO as BytesIO
except ImportError:
from io import BytesIO
from io import StringIO
import six
from scrapy.http import TextResponse, Response
@ -65,7 +64,7 @@ class _StreamReader(object):
self._text, self.encoding = obj.body, obj.encoding
else:
self._text, self.encoding = obj, 'utf-8'
self._is_unicode = isinstance(self._text, unicode)
self._is_unicode = isinstance(self._text, six.text_type)
def read(self, n=65535):
self.read = self._read_unicode if self._is_unicode else self._read_string
@ -102,7 +101,11 @@ def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
def _getrow(csv_r):
return [to_unicode(field, encoding) for field in next(csv_r)]
lines = BytesIO(_body_or_str(obj, unicode=False))
# Python 3 csv reader input object needs to return strings
if six.PY3:
lines = StringIO(_body_or_str(obj, unicode=True))
else:
lines = BytesIO(_body_or_str(obj, unicode=False))
kwargs = {}
if delimiter: kwargs["delimiter"] = delimiter

View File

@ -16,7 +16,6 @@ tests/test_pipeline_files.py
tests/test_pipeline_images.py
tests/test_proxy_connect.py
tests/test_spidermiddleware_httperror.py
tests/test_utils_iterators.py
tests/test_utils_template.py
tests/test_webclient.py

View File

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import os
import six
from twisted.trial import unittest
from scrapy.utils.iterators import csviter, xmliter, _body_or_str, xmliter_lxml
@ -148,7 +149,7 @@ class XmliterTestCase(unittest.TestCase):
body = b'<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'
response = XmlResponse('http://www.example.com', body=body)
self.assertEqual(
self.xmliter(response, 'item').next().extract(),
next(self.xmliter(response, 'item')).extract(),
u'<item>Some Turkish Characters \xd6\xc7\u015e\u0130\u011e\xdc \xfc\u011f\u0131\u015f\xe7\xf6</item>'
)
@ -238,11 +239,11 @@ class UtilsCsvTestCase(unittest.TestCase):
# explicit type check cuz' we no like stinkin' autocasting! yarrr
for result_row in result:
self.assert_(all((isinstance(k, unicode) for k in result_row.keys())))
self.assert_(all((isinstance(v, unicode) for v in result_row.values())))
self.assert_(all((isinstance(k, six.text_type) for k in result_row.keys())))
self.assert_(all((isinstance(v, six.text_type) for v in result_row.values())))
def test_csviter_delimiter(self):
body = get_testdata('feeds', 'feed-sample3.csv').replace(',', '\t')
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
response = TextResponse(url="http://example.com/", body=body)
csv = csviter(response, delimiter='\t')
@ -254,7 +255,7 @@ class UtilsCsvTestCase(unittest.TestCase):
def test_csviter_quotechar(self):
body1 = get_testdata('feeds', 'feed-sample6.csv')
body2 = get_testdata('feeds', 'feed-sample6.csv').replace(",", '|')
body2 = get_testdata('feeds', 'feed-sample6.csv').replace(b',', b'|')
response1 = TextResponse(url="http://example.com/", body=body1)
csv1 = csviter(response1, quotechar="'")
@ -286,7 +287,7 @@ class UtilsCsvTestCase(unittest.TestCase):
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
body = get_testdata('feeds', 'feed-sample3.csv').replace(',', '\t')
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
response = Response(url="http://example.com/", body=body)
csv = csviter(response, delimiter='\t')
@ -298,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
def test_csviter_headers(self):
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
headers, body = sample[0].split(','), '\n'.join(sample[1:])
headers, body = sample[0].split(b','), b'\n'.join(sample[1:])
response = TextResponse(url="http://example.com/", body=body)
csv = csviter(response, headers=headers)
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
@ -311,7 +312,7 @@ class UtilsCsvTestCase(unittest.TestCase):
def test_csviter_falserow(self):
body = get_testdata('feeds', 'feed-sample3.csv')
body = '\n'.join((body, 'a,b', 'a,b,c,d'))
body = b'\n'.join((body, b'a,b', b'a,b,c,d'))
response = TextResponse(url="http://example.com/", body=body)
csv = csviter(response)