1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 07:24:09 +00:00

Merge pull request #1499 from scrapy/py3-port-exporters

[MRG+1] PY3 exporters
This commit is contained in:
Daniel Graña 2016-01-26 01:01:30 -03:00
commit a7b86137d0
4 changed files with 180 additions and 77 deletions

View File

@ -3,6 +3,7 @@ Item Exporters are used to export/serialize items into different formats.
""" """
import csv import csv
import io
import sys import sys
import pprint import pprint
import marshal import marshal
@ -11,7 +12,11 @@ from six.moves import cPickle as pickle
from xml.sax.saxutils import XMLGenerator from xml.sax.saxutils import XMLGenerator
from scrapy.utils.serialize import ScrapyJSONEncoder from scrapy.utils.serialize import ScrapyJSONEncoder
from scrapy.utils.python import to_bytes, to_unicode, to_native_str, is_listlike
from scrapy.item import BaseItem from scrapy.item import BaseItem
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter', __all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter', 'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
@ -38,7 +43,7 @@ class BaseItemExporter(object):
raise NotImplementedError raise NotImplementedError
def serialize_field(self, field, name, value): def serialize_field(self, field, name, value):
serializer = field.get('serializer', self._to_str_if_unicode) serializer = field.get('serializer', lambda x: x)
return serializer(value) return serializer(value)
def start_exporting(self): def start_exporting(self):
@ -47,9 +52,6 @@ class BaseItemExporter(object):
def finish_exporting(self): def finish_exporting(self):
pass pass
def _to_str_if_unicode(self, value):
return value.encode(self.encoding) if isinstance(value, unicode) else value
def _get_serialized_fields(self, item, default_value=None, include_empty=None): def _get_serialized_fields(self, item, default_value=None, include_empty=None):
"""Return the fields to export as an iterable of tuples """Return the fields to export as an iterable of tuples
(name, serialized_value) (name, serialized_value)
@ -86,10 +88,10 @@ class JsonLinesItemExporter(BaseItemExporter):
def export_item(self, item): def export_item(self, item):
itemdict = dict(self._get_serialized_fields(item)) itemdict = dict(self._get_serialized_fields(item))
self.file.write(self.encoder.encode(itemdict) + '\n') self.file.write(to_bytes(self.encoder.encode(itemdict) + '\n'))
class JsonItemExporter(JsonLinesItemExporter): class JsonItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs): def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True) self._configure(kwargs, dont_fail=True)
@ -98,18 +100,18 @@ class JsonItemExporter(JsonLinesItemExporter):
self.first_item = True self.first_item = True
def start_exporting(self): def start_exporting(self):
self.file.write("[") self.file.write(b"[")
def finish_exporting(self): def finish_exporting(self):
self.file.write("]") self.file.write(b"]")
def export_item(self, item): def export_item(self, item):
if self.first_item: if self.first_item:
self.first_item = False self.first_item = False
else: else:
self.file.write(',\n') self.file.write(b',\n')
itemdict = dict(self._get_serialized_fields(item)) itemdict = dict(self._get_serialized_fields(item))
self.file.write(self.encoder.encode(itemdict)) self.file.write(to_bytes(self.encoder.encode(itemdict)))
class XmlItemExporter(BaseItemExporter): class XmlItemExporter(BaseItemExporter):
@ -139,7 +141,7 @@ class XmlItemExporter(BaseItemExporter):
if hasattr(serialized_value, 'items'): if hasattr(serialized_value, 'items'):
for subname, value in serialized_value.items(): for subname, value in serialized_value.items():
self._export_xml_field(subname, value) self._export_xml_field(subname, value)
elif hasattr(serialized_value, '__iter__'): elif is_listlike(serialized_value):
for value in serialized_value: for value in serialized_value:
self._export_xml_field('value', value) self._export_xml_field('value', value)
else: else:
@ -153,10 +155,10 @@ class XmlItemExporter(BaseItemExporter):
# and Python 3.x will require unicode, so ">= 2.7.4" should be fine. # and Python 3.x will require unicode, so ">= 2.7.4" should be fine.
if sys.version_info[:3] >= (2, 7, 4): if sys.version_info[:3] >= (2, 7, 4):
def _xg_characters(self, serialized_value): def _xg_characters(self, serialized_value):
if not isinstance(serialized_value, unicode): if not isinstance(serialized_value, six.text_type):
serialized_value = serialized_value.decode(self.encoding) serialized_value = serialized_value.decode(self.encoding)
return self.xg.characters(serialized_value) return self.xg.characters(serialized_value)
else: else: # pragma: no cover
def _xg_characters(self, serialized_value): def _xg_characters(self, serialized_value):
return self.xg.characters(serialized_value) return self.xg.characters(serialized_value)
@ -166,17 +168,22 @@ class CsvItemExporter(BaseItemExporter):
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs): def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
self._configure(kwargs, dont_fail=True) self._configure(kwargs, dont_fail=True)
self.include_headers_line = include_headers_line self.include_headers_line = include_headers_line
file = file if six.PY2 else io.TextIOWrapper(file, line_buffering=True)
self.csv_writer = csv.writer(file, **kwargs) self.csv_writer = csv.writer(file, **kwargs)
self._headers_not_written = True self._headers_not_written = True
self._join_multivalued = join_multivalued self._join_multivalued = join_multivalued
def _to_str_if_unicode(self, value): def serialize_field(self, field, name, value):
serializer = field.get('serializer', self._join_if_needed)
return serializer(value)
def _join_if_needed(self, value):
if isinstance(value, (list, tuple)): if isinstance(value, (list, tuple)):
try: try:
value = self._join_multivalued.join(value) return self._join_multivalued.join(value)
except TypeError: # list in value may not contain strings except TypeError: # list in value may not contain strings
pass pass
return super(CsvItemExporter, self)._to_str_if_unicode(value) return value
def export_item(self, item): def export_item(self, item):
if self._headers_not_written: if self._headers_not_written:
@ -185,9 +192,16 @@ class CsvItemExporter(BaseItemExporter):
fields = self._get_serialized_fields(item, default_value='', fields = self._get_serialized_fields(item, default_value='',
include_empty=True) include_empty=True)
values = [x[1] for x in fields] values = list(self._build_row(x for _, x in fields))
self.csv_writer.writerow(values) self.csv_writer.writerow(values)
def _build_row(self, values):
for s in values:
try:
yield to_native_str(s)
except TypeError:
yield to_native_str(repr(s))
def _write_headers_and_set_fields_to_export(self, item): def _write_headers_and_set_fields_to_export(self, item):
if self.include_headers_line: if self.include_headers_line:
if not self.fields_to_export: if not self.fields_to_export:
@ -197,7 +211,8 @@ class CsvItemExporter(BaseItemExporter):
else: else:
# use fields declared in Item # use fields declared in Item
self.fields_to_export = list(item.fields.keys()) self.fields_to_export = list(item.fields.keys())
self.csv_writer.writerow(self.fields_to_export) row = list(self._build_row(self.fields_to_export))
self.csv_writer.writerow(row)
class PickleItemExporter(BaseItemExporter): class PickleItemExporter(BaseItemExporter):
@ -230,7 +245,7 @@ class PprintItemExporter(BaseItemExporter):
def export_item(self, item): def export_item(self, item):
itemdict = dict(self._get_serialized_fields(item)) itemdict = dict(self._get_serialized_fields(item))
self.file.write(pprint.pformat(itemdict) + '\n') self.file.write(to_bytes(pprint.pformat(itemdict) + '\n'))
class PythonItemExporter(BaseItemExporter): class PythonItemExporter(BaseItemExporter):
@ -239,6 +254,13 @@ class PythonItemExporter(BaseItemExporter):
json, msgpack, binc, etc) can be used on top of it. Its main goal is to json, msgpack, binc, etc) can be used on top of it. Its main goal is to
seamless support what BaseItemExporter does plus nested items. seamless support what BaseItemExporter does plus nested items.
""" """
def _configure(self, options, dont_fail=False):
self.binary = options.pop('binary', True)
super(PythonItemExporter, self)._configure(options, dont_fail)
if self.binary:
warnings.warn(
"PythonItemExporter will drop support for binary export in the future",
ScrapyDeprecationWarning)
def serialize_field(self, field, name, value): def serialize_field(self, field, name, value):
serializer = field.get('serializer', self._serialize_value) serializer = field.get('serializer', self._serialize_value)
@ -249,13 +271,20 @@ class PythonItemExporter(BaseItemExporter):
return self.export_item(value) return self.export_item(value)
if isinstance(value, dict): if isinstance(value, dict):
return dict(self._serialize_dict(value)) return dict(self._serialize_dict(value))
if hasattr(value, '__iter__'): if is_listlike(value):
return [self._serialize_value(v) for v in value] return [self._serialize_value(v) for v in value]
return self._to_str_if_unicode(value) if self.binary:
return to_bytes(value, encoding=self.encoding)
else:
return to_unicode(value, encoding=self.encoding)
def _serialize_dict(self, value): def _serialize_dict(self, value):
for key, val in six.iteritems(value): for key, val in six.iteritems(value):
key = to_bytes(key) if self.binary else key
yield key, self._serialize_value(val) yield key, self._serialize_value(val)
def export_item(self, item): def export_item(self, item):
return dict(self._get_serialized_fields(item)) result = dict(self._get_serialized_fields(item))
if self.binary:
result = dict(self._serialize_dict(result))
return result

View File

@ -1,4 +1,3 @@
tests/test_exporters.py
tests/test_linkextractors_deprecated.py tests/test_linkextractors_deprecated.py
tests/test_proxy_connect.py tests/test_proxy_connect.py

View File

@ -1,17 +1,21 @@
from __future__ import absolute_import from __future__ import absolute_import
import re import re
import json import json
import marshal
import tempfile
import unittest import unittest
from io import BytesIO from io import BytesIO
from six.moves import cPickle as pickle from six.moves import cPickle as pickle
import lxml.etree import lxml.etree
import six
from scrapy.item import Item, Field from scrapy.item import Item, Field
from scrapy.utils.python import to_unicode from scrapy.utils.python import to_unicode
from scrapy.exporters import ( from scrapy.exporters import (
BaseItemExporter, PprintItemExporter, PickleItemExporter, CsvItemExporter, BaseItemExporter, PprintItemExporter, PickleItemExporter, CsvItemExporter,
XmlItemExporter, JsonLinesItemExporter, JsonItemExporter, PythonItemExporter XmlItemExporter, JsonLinesItemExporter, JsonItemExporter,
PythonItemExporter, MarshalItemExporter
) )
@ -23,7 +27,7 @@ class TestItem(Item):
class BaseItemExporterTest(unittest.TestCase): class BaseItemExporterTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.i = TestItem(name=u'John\xa3', age='22') self.i = TestItem(name=u'John\xa3', age=u'22')
self.output = BytesIO() self.output = BytesIO()
self.ie = self._get_exporter() self.ie = self._get_exporter()
@ -56,19 +60,19 @@ class BaseItemExporterTest(unittest.TestCase):
def test_serialize_field(self): def test_serialize_field(self):
res = self.ie.serialize_field(self.i.fields['name'], 'name', self.i['name']) res = self.ie.serialize_field(self.i.fields['name'], 'name', self.i['name'])
self.assertEqual(res, 'John\xc2\xa3') self.assertEqual(res, u'John\xa3')
res = self.ie.serialize_field(self.i.fields['age'], 'age', self.i['age']) res = self.ie.serialize_field(self.i.fields['age'], 'age', self.i['age'])
self.assertEqual(res, '22') self.assertEqual(res, u'22')
def test_fields_to_export(self): def test_fields_to_export(self):
ie = self._get_exporter(fields_to_export=['name']) ie = self._get_exporter(fields_to_export=['name'])
self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', 'John\xc2\xa3')]) self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', u'John\xa3')])
ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1') ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1')
name = list(ie._get_serialized_fields(self.i))[0][1] _, name = list(ie._get_serialized_fields(self.i))[0]
assert isinstance(name, str) assert isinstance(name, six.text_type)
self.assertEqual(name, 'John\xa3') self.assertEqual(name, u'John\xa3')
def test_field_custom_serializer(self): def test_field_custom_serializer(self):
def custom_serializer(value): def custom_serializer(value):
@ -78,16 +82,20 @@ class BaseItemExporterTest(unittest.TestCase):
name = Field() name = Field()
age = Field(serializer=custom_serializer) age = Field(serializer=custom_serializer)
i = CustomFieldItem(name=u'John\xa3', age='22') i = CustomFieldItem(name=u'John\xa3', age=u'22')
ie = self._get_exporter() ie = self._get_exporter()
self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), 'John\xc2\xa3') self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), u'John\xa3')
self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24') self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24')
class PythonItemExporterTest(BaseItemExporterTest): class PythonItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs): def _get_exporter(self, **kwargs):
return PythonItemExporter(**kwargs) return PythonItemExporter(binary=False, **kwargs)
def test_invalid_option(self):
with self.assertRaisesRegexp(TypeError, "Unexpected options: invalid_option"):
PythonItemExporter(invalid_option='something')
def test_nested_item(self): def test_nested_item(self):
i1 = TestItem(name=u'Joseph', age='22') i1 = TestItem(name=u'Joseph', age='22')
@ -120,6 +128,12 @@ class PythonItemExporterTest(BaseItemExporterTest):
self.assertEqual(type(exported['age'][0]), dict) self.assertEqual(type(exported['age'][0]), dict)
self.assertEqual(type(exported['age'][0]['age'][0]), dict) self.assertEqual(type(exported['age'][0]['age'][0]), dict)
def test_export_binary(self):
exporter = PythonItemExporter(binary=True)
value = TestItem(name=u'John\xa3', age=u'22')
expected = {b'name': b'John\xc2\xa3', b'age': b'22'}
self.assertEqual(expected, exporter.export_item(value))
class PprintItemExporterTest(BaseItemExporterTest): class PprintItemExporterTest(BaseItemExporterTest):
@ -152,18 +166,30 @@ class PickleItemExporterTest(BaseItemExporterTest):
self.assertEqual(pickle.load(f), i2) self.assertEqual(pickle.load(f), i2)
class CsvItemExporterTest(BaseItemExporterTest): class MarshalItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs):
self.output = tempfile.TemporaryFile()
return MarshalItemExporter(self.output, **kwargs)
def _check_output(self):
self.output.seek(0)
self._assert_expected_item(marshal.load(self.output))
class CsvItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs): def _get_exporter(self, **kwargs):
return CsvItemExporter(self.output, **kwargs) return CsvItemExporter(self.output, **kwargs)
def assertCsvEqual(self, first, second, msg=None): def assertCsvEqual(self, first, second, msg=None):
first = to_unicode(first)
second = to_unicode(second)
csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line)) csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
for line in csv.splitlines(True)] for line in csv.splitlines(True)]
return self.assertEqual(csvsplit(first), csvsplit(second), msg) return self.assertEqual(csvsplit(first), csvsplit(second), msg)
def _check_output(self): def _check_output(self):
self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') self.assertCsvEqual(to_unicode(self.output.getvalue()), u'age,name\r\n22,John\xa3\r\n')
def assertExportResult(self, item, expected, **kwargs): def assertExportResult(self, item, expected, **kwargs):
fp = BytesIO() fp = BytesIO()
@ -177,13 +203,13 @@ class CsvItemExporterTest(BaseItemExporterTest):
self.assertExportResult( self.assertExportResult(
item=self.i, item=self.i,
fields_to_export=self.i.fields.keys(), fields_to_export=self.i.fields.keys(),
expected='age,name\r\n22,John\xc2\xa3\r\n', expected=b'age,name\r\n22,John\xc2\xa3\r\n',
) )
def test_header_export_all_dict(self): def test_header_export_all_dict(self):
self.assertExportResult( self.assertExportResult(
item=dict(self.i), item=dict(self.i),
expected='age,name\r\n22,John\xc2\xa3\r\n', expected=b'age,name\r\n22,John\xc2\xa3\r\n',
) )
def test_header_export_single_field(self): def test_header_export_single_field(self):
@ -191,7 +217,7 @@ class CsvItemExporterTest(BaseItemExporterTest):
self.assertExportResult( self.assertExportResult(
item=item, item=item,
fields_to_export=['age'], fields_to_export=['age'],
expected='age\r\n22\r\n', expected=b'age\r\n22\r\n',
) )
def test_header_export_two_items(self): def test_header_export_two_items(self):
@ -202,14 +228,15 @@ class CsvItemExporterTest(BaseItemExporterTest):
ie.export_item(item) ie.export_item(item)
ie.export_item(item) ie.export_item(item)
ie.finish_exporting() ie.finish_exporting()
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n') self.assertCsvEqual(output.getvalue(),
b'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
def test_header_no_header_line(self): def test_header_no_header_line(self):
for item in [self.i, dict(self.i)]: for item in [self.i, dict(self.i)]:
self.assertExportResult( self.assertExportResult(
item=item, item=item,
include_headers_line=False, include_headers_line=False,
expected='22,John\xc2\xa3\r\n', expected=b'22,John\xc2\xa3\r\n',
) )
def test_join_multivalue(self): def test_join_multivalue(self):
@ -224,6 +251,13 @@ class CsvItemExporterTest(BaseItemExporterTest):
expected='"Mary,Paul",John\r\n', expected='"Mary,Paul",John\r\n',
) )
def test_join_multivalue_not_strings(self):
self.assertExportResult(
item=dict(name='John', friends=[4, 8]),
include_headers_line=False,
expected='"[4, 8]",John\r\n',
)
class XmlItemExporterTest(BaseItemExporterTest): class XmlItemExporterTest(BaseItemExporterTest):
@ -252,13 +286,13 @@ class XmlItemExporterTest(BaseItemExporterTest):
self.assertXmlEquivalent(fp.getvalue(), expected_value) self.assertXmlEquivalent(fp.getvalue(), expected_value)
def _check_output(self): def _check_output(self):
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>' expected_value = b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
self.assertXmlEquivalent(self.output.getvalue(), expected_value) self.assertXmlEquivalent(self.output.getvalue(), expected_value)
def test_multivalued_fields(self): def test_multivalued_fields(self):
self.assertExportResult( self.assertExportResult(
TestItem(name=[u'John\xa3', u'Doe']), TestItem(name=[u'John\xa3', u'Doe']),
'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>' b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
) )
def test_nested_item(self): def test_nested_item(self):
@ -267,19 +301,19 @@ class XmlItemExporterTest(BaseItemExporterTest):
i3 = TestItem(name=u'buz', age=i2) i3 = TestItem(name=u'buz', age=i2)
self.assertExportResult(i3, self.assertExportResult(i3,
'<?xml version="1.0" encoding="utf-8"?>\n' b'<?xml version="1.0" encoding="utf-8"?>\n'
'<items>' b'<items>'
'<item>' b'<item>'
'<age>' b'<age>'
'<age>' b'<age>'
'<age>22</age>' b'<age>22</age>'
'<name>foo\xc2\xa3hoo</name>' b'<name>foo\xc2\xa3hoo</name>'
'</age>' b'</age>'
'<name>bar</name>' b'<name>bar</name>'
'</age>' b'</age>'
'<name>buz</name>' b'<name>buz</name>'
'</item>' b'</item>'
'</items>' b'</items>'
) )
def test_nested_list_item(self): def test_nested_list_item(self):
@ -288,16 +322,16 @@ class XmlItemExporterTest(BaseItemExporterTest):
i3 = TestItem(name=u'buz', age=[i1, i2]) i3 = TestItem(name=u'buz', age=[i1, i2])
self.assertExportResult(i3, self.assertExportResult(i3,
'<?xml version="1.0" encoding="utf-8"?>\n' b'<?xml version="1.0" encoding="utf-8"?>\n'
'<items>' b'<items>'
'<item>' b'<item>'
'<age>' b'<age>'
'<value><name>foo</name></value>' b'<value><name>foo</name></value>'
'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>' b'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>'
'</age>' b'</age>'
'<name>buz</name>' b'<name>buz</name>'
'</item>' b'</item>'
'</items>' b'</items>'
) )
@ -309,7 +343,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
return JsonLinesItemExporter(self.output, **kwargs) return JsonLinesItemExporter(self.output, **kwargs)
def _check_output(self): def _check_output(self):
exported = json.loads(self.output.getvalue().strip()) exported = json.loads(to_unicode(self.output.getvalue().strip()))
self.assertEqual(exported, dict(self.i)) self.assertEqual(exported, dict(self.i))
def test_nested_item(self): def test_nested_item(self):
@ -319,7 +353,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
self.ie.start_exporting() self.ie.start_exporting()
self.ie.export_item(i3) self.ie.export_item(i3)
self.ie.finish_exporting() self.ie.finish_exporting()
exported = json.loads(self.output.getvalue()) exported = json.loads(to_unicode(self.output.getvalue()))
self.assertEqual(exported, self._expected_nested) self.assertEqual(exported, self._expected_nested)
def test_extra_keywords(self): def test_extra_keywords(self):
@ -337,7 +371,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
return JsonItemExporter(self.output, **kwargs) return JsonItemExporter(self.output, **kwargs)
def _check_output(self): def _check_output(self):
exported = json.loads(self.output.getvalue().strip()) exported = json.loads(to_unicode(self.output.getvalue().strip()))
self.assertEqual(exported, [dict(self.i)]) self.assertEqual(exported, [dict(self.i)])
def assertTwoItemsExported(self, item): def assertTwoItemsExported(self, item):
@ -345,7 +379,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
self.ie.export_item(item) self.ie.export_item(item)
self.ie.export_item(item) self.ie.export_item(item)
self.ie.finish_exporting() self.ie.finish_exporting()
exported = json.loads(self.output.getvalue()) exported = json.loads(to_unicode(self.output.getvalue()))
self.assertEqual(exported, [dict(item), dict(item)]) self.assertEqual(exported, [dict(item), dict(item)])
def test_two_items(self): def test_two_items(self):
@ -361,7 +395,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
self.ie.start_exporting() self.ie.start_exporting()
self.ie.export_item(i3) self.ie.export_item(i3)
self.ie.finish_exporting() self.ie.finish_exporting()
exported = json.loads(self.output.getvalue()) exported = json.loads(to_unicode(self.output.getvalue()))
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': dict(i1)}} expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': dict(i1)}}
self.assertEqual(exported, [expected]) self.assertEqual(exported, [expected])
@ -372,7 +406,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
self.ie.start_exporting() self.ie.start_exporting()
self.ie.export_item(i3) self.ie.export_item(i3)
self.ie.finish_exporting() self.ie.finish_exporting()
exported = json.loads(self.output.getvalue()) exported = json.loads(to_unicode(self.output.getvalue()))
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': i1}} expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': i1}}
self.assertEqual(exported, [expected]) self.assertEqual(exported, [expected])

View File

@ -5,7 +5,6 @@ import json
from io import BytesIO from io import BytesIO
import tempfile import tempfile
import shutil import shutil
import six
from six.moves.urllib.parse import urlparse from six.moves.urllib.parse import urlparse
from zope.interface.verify import verifyObject from zope.interface.verify import verifyObject
@ -22,6 +21,7 @@ from scrapy.extensions.feedexport import (
S3FeedStorage, StdoutFeedStorage S3FeedStorage, StdoutFeedStorage
) )
from scrapy.utils.test import assert_aws_environ from scrapy.utils.test import assert_aws_environ
from scrapy.utils.python import to_native_str
class FileFeedStorageTest(unittest.TestCase): class FileFeedStorageTest(unittest.TestCase):
@ -120,8 +120,6 @@ class StdoutFeedStorageTest(unittest.TestCase):
class FeedExportTest(unittest.TestCase): class FeedExportTest(unittest.TestCase):
skip = not six.PY2
class MyItem(scrapy.Item): class MyItem(scrapy.Item):
foo = scrapy.Field() foo = scrapy.Field()
egg = scrapy.Field() egg = scrapy.Field()
@ -170,7 +168,7 @@ class FeedExportTest(unittest.TestCase):
settings.update({'FEED_FORMAT': 'csv'}) settings.update({'FEED_FORMAT': 'csv'})
data = yield self.exported_data(items, settings) data = yield self.exported_data(items, settings)
reader = csv.DictReader(data.splitlines()) reader = csv.DictReader(to_native_str(data).splitlines())
got_rows = list(reader) got_rows = list(reader)
if ordered: if ordered:
self.assertEqual(reader.fieldnames, header) self.assertEqual(reader.fieldnames, header)
@ -184,14 +182,57 @@ class FeedExportTest(unittest.TestCase):
settings = settings or {} settings = settings or {}
settings.update({'FEED_FORMAT': 'jl'}) settings.update({'FEED_FORMAT': 'jl'})
data = yield self.exported_data(items, settings) data = yield self.exported_data(items, settings)
parsed = [json.loads(line) for line in data.splitlines()] parsed = [json.loads(to_native_str(line)) for line in data.splitlines()]
rows = [{k: v for k, v in row.items() if v} for row in rows] rows = [{k: v for k, v in row.items() if v} for row in rows]
self.assertEqual(rows, parsed) self.assertEqual(rows, parsed)
@defer.inlineCallbacks
def assertExportedXml(self, items, rows, settings=None):
settings = settings or {}
settings.update({'FEED_FORMAT': 'xml'})
data = yield self.exported_data(items, settings)
rows = [{k: v for k, v in row.items() if v} for row in rows]
import lxml.etree
root = lxml.etree.fromstring(data)
got_rows = [{e.tag: e.text for e in it} for it in root.findall('item')]
self.assertEqual(rows, got_rows)
def _load_until_eof(self, data, load_func):
bytes_output = BytesIO(data)
result = []
while True:
try:
result.append(load_func(bytes_output))
except EOFError:
break
return result
@defer.inlineCallbacks
def assertExportedPickle(self, items, rows, settings=None):
settings = settings or {}
settings.update({'FEED_FORMAT': 'pickle'})
data = yield self.exported_data(items, settings)
expected = [{k: v for k, v in row.items() if v} for row in rows]
import pickle
result = self._load_until_eof(data, load_func=pickle.load)
self.assertEqual(expected, result)
@defer.inlineCallbacks
def assertExportedMarshal(self, items, rows, settings=None):
settings = settings or {}
settings.update({'FEED_FORMAT': 'marshal'})
data = yield self.exported_data(items, settings)
expected = [{k: v for k, v in row.items() if v} for row in rows]
import marshal
result = self._load_until_eof(data, load_func=marshal.load)
self.assertEqual(expected, result)
@defer.inlineCallbacks @defer.inlineCallbacks
def assertExported(self, items, header, rows, settings=None, ordered=True): def assertExported(self, items, header, rows, settings=None, ordered=True):
yield self.assertExportedCsv(items, header, rows, settings, ordered) yield self.assertExportedCsv(items, header, rows, settings, ordered)
yield self.assertExportedJsonLines(items, rows, settings) yield self.assertExportedJsonLines(items, rows, settings)
yield self.assertExportedXml(items, rows, settings)
yield self.assertExportedPickle(items, rows, settings)
@defer.inlineCallbacks @defer.inlineCallbacks
def test_export_items(self): def test_export_items(self):