mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 16:04:01 +00:00
Merge remote-tracking branch 'origin/master' into xmliter-unicode
This commit is contained in:
commit
2f2c2e8096
@ -1,12 +1,11 @@
|
||||
import re
|
||||
import csv
|
||||
import logging
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO as BytesIO
|
||||
except ImportError:
|
||||
from io import BytesIO
|
||||
|
||||
from io import StringIO
|
||||
import six
|
||||
|
||||
from scrapy.http import TextResponse, Response
|
||||
@ -65,7 +64,7 @@ class _StreamReader(object):
|
||||
self._text, self.encoding = obj.body, obj.encoding
|
||||
else:
|
||||
self._text, self.encoding = obj, 'utf-8'
|
||||
self._is_unicode = isinstance(self._text, unicode)
|
||||
self._is_unicode = isinstance(self._text, six.text_type)
|
||||
|
||||
def read(self, n=65535):
|
||||
self.read = self._read_unicode if self._is_unicode else self._read_string
|
||||
@ -102,7 +101,11 @@ def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
|
||||
def _getrow(csv_r):
|
||||
return [to_unicode(field, encoding) for field in next(csv_r)]
|
||||
|
||||
lines = BytesIO(_body_or_str(obj, unicode=False))
|
||||
# Python 3 csv reader input object needs to return strings
|
||||
if six.PY3:
|
||||
lines = StringIO(_body_or_str(obj, unicode=True))
|
||||
else:
|
||||
lines = BytesIO(_body_or_str(obj, unicode=False))
|
||||
|
||||
kwargs = {}
|
||||
if delimiter: kwargs["delimiter"] = delimiter
|
||||
|
@ -16,7 +16,6 @@ tests/test_pipeline_files.py
|
||||
tests/test_pipeline_images.py
|
||||
tests/test_proxy_connect.py
|
||||
tests/test_spidermiddleware_httperror.py
|
||||
tests/test_utils_iterators.py
|
||||
tests/test_utils_template.py
|
||||
tests/test_webclient.py
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import six
|
||||
from twisted.trial import unittest
|
||||
|
||||
from scrapy.utils.iterators import csviter, xmliter, _body_or_str, xmliter_lxml
|
||||
@ -148,7 +149,7 @@ class XmliterTestCase(unittest.TestCase):
|
||||
body = b'<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'
|
||||
response = XmlResponse('http://www.example.com', body=body)
|
||||
self.assertEqual(
|
||||
self.xmliter(response, 'item').next().extract(),
|
||||
next(self.xmliter(response, 'item')).extract(),
|
||||
u'<item>Some Turkish Characters \xd6\xc7\u015e\u0130\u011e\xdc \xfc\u011f\u0131\u015f\xe7\xf6</item>'
|
||||
)
|
||||
|
||||
@ -238,11 +239,11 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
|
||||
# explicit type check cuz' we no like stinkin' autocasting! yarrr
|
||||
for result_row in result:
|
||||
self.assert_(all((isinstance(k, unicode) for k in result_row.keys())))
|
||||
self.assert_(all((isinstance(v, unicode) for v in result_row.values())))
|
||||
self.assert_(all((isinstance(k, six.text_type) for k in result_row.keys())))
|
||||
self.assert_(all((isinstance(v, six.text_type) for v in result_row.values())))
|
||||
|
||||
def test_csviter_delimiter(self):
|
||||
body = get_testdata('feeds', 'feed-sample3.csv').replace(',', '\t')
|
||||
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
|
||||
response = TextResponse(url="http://example.com/", body=body)
|
||||
csv = csviter(response, delimiter='\t')
|
||||
|
||||
@ -254,7 +255,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
|
||||
def test_csviter_quotechar(self):
|
||||
body1 = get_testdata('feeds', 'feed-sample6.csv')
|
||||
body2 = get_testdata('feeds', 'feed-sample6.csv').replace(",", '|')
|
||||
body2 = get_testdata('feeds', 'feed-sample6.csv').replace(b',', b'|')
|
||||
|
||||
response1 = TextResponse(url="http://example.com/", body=body1)
|
||||
csv1 = csviter(response1, quotechar="'")
|
||||
@ -286,7 +287,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
|
||||
|
||||
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
|
||||
body = get_testdata('feeds', 'feed-sample3.csv').replace(',', '\t')
|
||||
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
|
||||
response = Response(url="http://example.com/", body=body)
|
||||
csv = csviter(response, delimiter='\t')
|
||||
|
||||
@ -298,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
|
||||
def test_csviter_headers(self):
|
||||
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
|
||||
headers, body = sample[0].split(','), '\n'.join(sample[1:])
|
||||
headers, body = sample[0].split(b','), b'\n'.join(sample[1:])
|
||||
|
||||
response = TextResponse(url="http://example.com/", body=body)
|
||||
csv = csviter(response, headers=headers)
|
||||
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
|
||||
|
||||
self.assertEqual([row for row in csv],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
@ -311,7 +312,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
|
||||
def test_csviter_falserow(self):
|
||||
body = get_testdata('feeds', 'feed-sample3.csv')
|
||||
body = '\n'.join((body, 'a,b', 'a,b,c,d'))
|
||||
body = b'\n'.join((body, b'a,b', b'a,b,c,d'))
|
||||
|
||||
response = TextResponse(url="http://example.com/", body=body)
|
||||
csv = csviter(response)
|
||||
|
Loading…
x
Reference in New Issue
Block a user