mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 00:43:59 +00:00
Merge pull request #1499 from scrapy/py3-port-exporters
[MRG+1] PY3 exporters
This commit is contained in:
commit
a7b86137d0
@ -3,6 +3,7 @@ Item Exporters are used to export/serialize items into different formats.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import sys
|
||||
import pprint
|
||||
import marshal
|
||||
@ -11,7 +12,11 @@ from six.moves import cPickle as pickle
|
||||
from xml.sax.saxutils import XMLGenerator
|
||||
|
||||
from scrapy.utils.serialize import ScrapyJSONEncoder
|
||||
from scrapy.utils.python import to_bytes, to_unicode, to_native_str, is_listlike
|
||||
from scrapy.item import BaseItem
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
import warnings
|
||||
|
||||
|
||||
__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
|
||||
'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
|
||||
@ -38,7 +43,7 @@ class BaseItemExporter(object):
|
||||
raise NotImplementedError
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', self._to_str_if_unicode)
|
||||
serializer = field.get('serializer', lambda x: x)
|
||||
return serializer(value)
|
||||
|
||||
def start_exporting(self):
|
||||
@ -47,9 +52,6 @@ class BaseItemExporter(object):
|
||||
def finish_exporting(self):
|
||||
pass
|
||||
|
||||
def _to_str_if_unicode(self, value):
|
||||
return value.encode(self.encoding) if isinstance(value, unicode) else value
|
||||
|
||||
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
|
||||
"""Return the fields to export as an iterable of tuples
|
||||
(name, serialized_value)
|
||||
@ -86,10 +88,10 @@ class JsonLinesItemExporter(BaseItemExporter):
|
||||
|
||||
def export_item(self, item):
|
||||
itemdict = dict(self._get_serialized_fields(item))
|
||||
self.file.write(self.encoder.encode(itemdict) + '\n')
|
||||
self.file.write(to_bytes(self.encoder.encode(itemdict) + '\n'))
|
||||
|
||||
|
||||
class JsonItemExporter(JsonLinesItemExporter):
|
||||
class JsonItemExporter(BaseItemExporter):
|
||||
|
||||
def __init__(self, file, **kwargs):
|
||||
self._configure(kwargs, dont_fail=True)
|
||||
@ -98,18 +100,18 @@ class JsonItemExporter(JsonLinesItemExporter):
|
||||
self.first_item = True
|
||||
|
||||
def start_exporting(self):
|
||||
self.file.write("[")
|
||||
self.file.write(b"[")
|
||||
|
||||
def finish_exporting(self):
|
||||
self.file.write("]")
|
||||
self.file.write(b"]")
|
||||
|
||||
def export_item(self, item):
|
||||
if self.first_item:
|
||||
self.first_item = False
|
||||
else:
|
||||
self.file.write(',\n')
|
||||
self.file.write(b',\n')
|
||||
itemdict = dict(self._get_serialized_fields(item))
|
||||
self.file.write(self.encoder.encode(itemdict))
|
||||
self.file.write(to_bytes(self.encoder.encode(itemdict)))
|
||||
|
||||
|
||||
class XmlItemExporter(BaseItemExporter):
|
||||
@ -139,7 +141,7 @@ class XmlItemExporter(BaseItemExporter):
|
||||
if hasattr(serialized_value, 'items'):
|
||||
for subname, value in serialized_value.items():
|
||||
self._export_xml_field(subname, value)
|
||||
elif hasattr(serialized_value, '__iter__'):
|
||||
elif is_listlike(serialized_value):
|
||||
for value in serialized_value:
|
||||
self._export_xml_field('value', value)
|
||||
else:
|
||||
@ -153,10 +155,10 @@ class XmlItemExporter(BaseItemExporter):
|
||||
# and Python 3.x will require unicode, so ">= 2.7.4" should be fine.
|
||||
if sys.version_info[:3] >= (2, 7, 4):
|
||||
def _xg_characters(self, serialized_value):
|
||||
if not isinstance(serialized_value, unicode):
|
||||
if not isinstance(serialized_value, six.text_type):
|
||||
serialized_value = serialized_value.decode(self.encoding)
|
||||
return self.xg.characters(serialized_value)
|
||||
else:
|
||||
else: # pragma: no cover
|
||||
def _xg_characters(self, serialized_value):
|
||||
return self.xg.characters(serialized_value)
|
||||
|
||||
@ -166,17 +168,22 @@ class CsvItemExporter(BaseItemExporter):
|
||||
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
|
||||
self._configure(kwargs, dont_fail=True)
|
||||
self.include_headers_line = include_headers_line
|
||||
file = file if six.PY2 else io.TextIOWrapper(file, line_buffering=True)
|
||||
self.csv_writer = csv.writer(file, **kwargs)
|
||||
self._headers_not_written = True
|
||||
self._join_multivalued = join_multivalued
|
||||
|
||||
def _to_str_if_unicode(self, value):
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', self._join_if_needed)
|
||||
return serializer(value)
|
||||
|
||||
def _join_if_needed(self, value):
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
value = self._join_multivalued.join(value)
|
||||
return self._join_multivalued.join(value)
|
||||
except TypeError: # list in value may not contain strings
|
||||
pass
|
||||
return super(CsvItemExporter, self)._to_str_if_unicode(value)
|
||||
return value
|
||||
|
||||
def export_item(self, item):
|
||||
if self._headers_not_written:
|
||||
@ -185,9 +192,16 @@ class CsvItemExporter(BaseItemExporter):
|
||||
|
||||
fields = self._get_serialized_fields(item, default_value='',
|
||||
include_empty=True)
|
||||
values = [x[1] for x in fields]
|
||||
values = list(self._build_row(x for _, x in fields))
|
||||
self.csv_writer.writerow(values)
|
||||
|
||||
def _build_row(self, values):
|
||||
for s in values:
|
||||
try:
|
||||
yield to_native_str(s)
|
||||
except TypeError:
|
||||
yield to_native_str(repr(s))
|
||||
|
||||
def _write_headers_and_set_fields_to_export(self, item):
|
||||
if self.include_headers_line:
|
||||
if not self.fields_to_export:
|
||||
@ -197,7 +211,8 @@ class CsvItemExporter(BaseItemExporter):
|
||||
else:
|
||||
# use fields declared in Item
|
||||
self.fields_to_export = list(item.fields.keys())
|
||||
self.csv_writer.writerow(self.fields_to_export)
|
||||
row = list(self._build_row(self.fields_to_export))
|
||||
self.csv_writer.writerow(row)
|
||||
|
||||
|
||||
class PickleItemExporter(BaseItemExporter):
|
||||
@ -230,7 +245,7 @@ class PprintItemExporter(BaseItemExporter):
|
||||
|
||||
def export_item(self, item):
|
||||
itemdict = dict(self._get_serialized_fields(item))
|
||||
self.file.write(pprint.pformat(itemdict) + '\n')
|
||||
self.file.write(to_bytes(pprint.pformat(itemdict) + '\n'))
|
||||
|
||||
|
||||
class PythonItemExporter(BaseItemExporter):
|
||||
@ -239,6 +254,13 @@ class PythonItemExporter(BaseItemExporter):
|
||||
json, msgpack, binc, etc) can be used on top of it. Its main goal is to
|
||||
seamless support what BaseItemExporter does plus nested items.
|
||||
"""
|
||||
def _configure(self, options, dont_fail=False):
|
||||
self.binary = options.pop('binary', True)
|
||||
super(PythonItemExporter, self)._configure(options, dont_fail)
|
||||
if self.binary:
|
||||
warnings.warn(
|
||||
"PythonItemExporter will drop support for binary export in the future",
|
||||
ScrapyDeprecationWarning)
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', self._serialize_value)
|
||||
@ -249,13 +271,20 @@ class PythonItemExporter(BaseItemExporter):
|
||||
return self.export_item(value)
|
||||
if isinstance(value, dict):
|
||||
return dict(self._serialize_dict(value))
|
||||
if hasattr(value, '__iter__'):
|
||||
if is_listlike(value):
|
||||
return [self._serialize_value(v) for v in value]
|
||||
return self._to_str_if_unicode(value)
|
||||
if self.binary:
|
||||
return to_bytes(value, encoding=self.encoding)
|
||||
else:
|
||||
return to_unicode(value, encoding=self.encoding)
|
||||
|
||||
def _serialize_dict(self, value):
|
||||
for key, val in six.iteritems(value):
|
||||
key = to_bytes(key) if self.binary else key
|
||||
yield key, self._serialize_value(val)
|
||||
|
||||
def export_item(self, item):
|
||||
return dict(self._get_serialized_fields(item))
|
||||
result = dict(self._get_serialized_fields(item))
|
||||
if self.binary:
|
||||
result = dict(self._serialize_dict(result))
|
||||
return result
|
||||
|
@ -1,4 +1,3 @@
|
||||
tests/test_exporters.py
|
||||
tests/test_linkextractors_deprecated.py
|
||||
tests/test_proxy_connect.py
|
||||
|
||||
|
@ -1,17 +1,21 @@
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
import json
|
||||
import marshal
|
||||
import tempfile
|
||||
import unittest
|
||||
from io import BytesIO
|
||||
from six.moves import cPickle as pickle
|
||||
|
||||
import lxml.etree
|
||||
import six
|
||||
|
||||
from scrapy.item import Item, Field
|
||||
from scrapy.utils.python import to_unicode
|
||||
from scrapy.exporters import (
|
||||
BaseItemExporter, PprintItemExporter, PickleItemExporter, CsvItemExporter,
|
||||
XmlItemExporter, JsonLinesItemExporter, JsonItemExporter, PythonItemExporter
|
||||
XmlItemExporter, JsonLinesItemExporter, JsonItemExporter,
|
||||
PythonItemExporter, MarshalItemExporter
|
||||
)
|
||||
|
||||
|
||||
@ -23,7 +27,7 @@ class TestItem(Item):
|
||||
class BaseItemExporterTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.i = TestItem(name=u'John\xa3', age='22')
|
||||
self.i = TestItem(name=u'John\xa3', age=u'22')
|
||||
self.output = BytesIO()
|
||||
self.ie = self._get_exporter()
|
||||
|
||||
@ -56,19 +60,19 @@ class BaseItemExporterTest(unittest.TestCase):
|
||||
|
||||
def test_serialize_field(self):
|
||||
res = self.ie.serialize_field(self.i.fields['name'], 'name', self.i['name'])
|
||||
self.assertEqual(res, 'John\xc2\xa3')
|
||||
self.assertEqual(res, u'John\xa3')
|
||||
|
||||
res = self.ie.serialize_field(self.i.fields['age'], 'age', self.i['age'])
|
||||
self.assertEqual(res, '22')
|
||||
self.assertEqual(res, u'22')
|
||||
|
||||
def test_fields_to_export(self):
|
||||
ie = self._get_exporter(fields_to_export=['name'])
|
||||
self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', 'John\xc2\xa3')])
|
||||
self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', u'John\xa3')])
|
||||
|
||||
ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1')
|
||||
name = list(ie._get_serialized_fields(self.i))[0][1]
|
||||
assert isinstance(name, str)
|
||||
self.assertEqual(name, 'John\xa3')
|
||||
_, name = list(ie._get_serialized_fields(self.i))[0]
|
||||
assert isinstance(name, six.text_type)
|
||||
self.assertEqual(name, u'John\xa3')
|
||||
|
||||
def test_field_custom_serializer(self):
|
||||
def custom_serializer(value):
|
||||
@ -78,16 +82,20 @@ class BaseItemExporterTest(unittest.TestCase):
|
||||
name = Field()
|
||||
age = Field(serializer=custom_serializer)
|
||||
|
||||
i = CustomFieldItem(name=u'John\xa3', age='22')
|
||||
i = CustomFieldItem(name=u'John\xa3', age=u'22')
|
||||
|
||||
ie = self._get_exporter()
|
||||
self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), 'John\xc2\xa3')
|
||||
self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), u'John\xa3')
|
||||
self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24')
|
||||
|
||||
|
||||
class PythonItemExporterTest(BaseItemExporterTest):
|
||||
def _get_exporter(self, **kwargs):
|
||||
return PythonItemExporter(**kwargs)
|
||||
return PythonItemExporter(binary=False, **kwargs)
|
||||
|
||||
def test_invalid_option(self):
|
||||
with self.assertRaisesRegexp(TypeError, "Unexpected options: invalid_option"):
|
||||
PythonItemExporter(invalid_option='something')
|
||||
|
||||
def test_nested_item(self):
|
||||
i1 = TestItem(name=u'Joseph', age='22')
|
||||
@ -120,6 +128,12 @@ class PythonItemExporterTest(BaseItemExporterTest):
|
||||
self.assertEqual(type(exported['age'][0]), dict)
|
||||
self.assertEqual(type(exported['age'][0]['age'][0]), dict)
|
||||
|
||||
def test_export_binary(self):
|
||||
exporter = PythonItemExporter(binary=True)
|
||||
value = TestItem(name=u'John\xa3', age=u'22')
|
||||
expected = {b'name': b'John\xc2\xa3', b'age': b'22'}
|
||||
self.assertEqual(expected, exporter.export_item(value))
|
||||
|
||||
|
||||
class PprintItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
@ -152,18 +166,30 @@ class PickleItemExporterTest(BaseItemExporterTest):
|
||||
self.assertEqual(pickle.load(f), i2)
|
||||
|
||||
|
||||
class CsvItemExporterTest(BaseItemExporterTest):
|
||||
class MarshalItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
def _get_exporter(self, **kwargs):
|
||||
self.output = tempfile.TemporaryFile()
|
||||
return MarshalItemExporter(self.output, **kwargs)
|
||||
|
||||
def _check_output(self):
|
||||
self.output.seek(0)
|
||||
self._assert_expected_item(marshal.load(self.output))
|
||||
|
||||
|
||||
class CsvItemExporterTest(BaseItemExporterTest):
|
||||
def _get_exporter(self, **kwargs):
|
||||
return CsvItemExporter(self.output, **kwargs)
|
||||
|
||||
def assertCsvEqual(self, first, second, msg=None):
|
||||
first = to_unicode(first)
|
||||
second = to_unicode(second)
|
||||
csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
|
||||
for line in csv.splitlines(True)]
|
||||
return self.assertEqual(csvsplit(first), csvsplit(second), msg)
|
||||
|
||||
def _check_output(self):
|
||||
self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
|
||||
self.assertCsvEqual(to_unicode(self.output.getvalue()), u'age,name\r\n22,John\xa3\r\n')
|
||||
|
||||
def assertExportResult(self, item, expected, **kwargs):
|
||||
fp = BytesIO()
|
||||
@ -177,13 +203,13 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
||||
self.assertExportResult(
|
||||
item=self.i,
|
||||
fields_to_export=self.i.fields.keys(),
|
||||
expected='age,name\r\n22,John\xc2\xa3\r\n',
|
||||
expected=b'age,name\r\n22,John\xc2\xa3\r\n',
|
||||
)
|
||||
|
||||
def test_header_export_all_dict(self):
|
||||
self.assertExportResult(
|
||||
item=dict(self.i),
|
||||
expected='age,name\r\n22,John\xc2\xa3\r\n',
|
||||
expected=b'age,name\r\n22,John\xc2\xa3\r\n',
|
||||
)
|
||||
|
||||
def test_header_export_single_field(self):
|
||||
@ -191,7 +217,7 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
||||
self.assertExportResult(
|
||||
item=item,
|
||||
fields_to_export=['age'],
|
||||
expected='age\r\n22\r\n',
|
||||
expected=b'age\r\n22\r\n',
|
||||
)
|
||||
|
||||
def test_header_export_two_items(self):
|
||||
@ -202,14 +228,15 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
||||
ie.export_item(item)
|
||||
ie.export_item(item)
|
||||
ie.finish_exporting()
|
||||
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
|
||||
self.assertCsvEqual(output.getvalue(),
|
||||
b'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
|
||||
|
||||
def test_header_no_header_line(self):
|
||||
for item in [self.i, dict(self.i)]:
|
||||
self.assertExportResult(
|
||||
item=item,
|
||||
include_headers_line=False,
|
||||
expected='22,John\xc2\xa3\r\n',
|
||||
expected=b'22,John\xc2\xa3\r\n',
|
||||
)
|
||||
|
||||
def test_join_multivalue(self):
|
||||
@ -224,6 +251,13 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
||||
expected='"Mary,Paul",John\r\n',
|
||||
)
|
||||
|
||||
def test_join_multivalue_not_strings(self):
|
||||
self.assertExportResult(
|
||||
item=dict(name='John', friends=[4, 8]),
|
||||
include_headers_line=False,
|
||||
expected='"[4, 8]",John\r\n',
|
||||
)
|
||||
|
||||
|
||||
class XmlItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
@ -252,13 +286,13 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
||||
self.assertXmlEquivalent(fp.getvalue(), expected_value)
|
||||
|
||||
def _check_output(self):
|
||||
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
|
||||
expected_value = b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
|
||||
self.assertXmlEquivalent(self.output.getvalue(), expected_value)
|
||||
|
||||
def test_multivalued_fields(self):
|
||||
self.assertExportResult(
|
||||
TestItem(name=[u'John\xa3', u'Doe']),
|
||||
'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
|
||||
b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
|
||||
)
|
||||
|
||||
def test_nested_item(self):
|
||||
@ -267,19 +301,19 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
||||
i3 = TestItem(name=u'buz', age=i2)
|
||||
|
||||
self.assertExportResult(i3,
|
||||
'<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
'<items>'
|
||||
'<item>'
|
||||
'<age>'
|
||||
'<age>'
|
||||
'<age>22</age>'
|
||||
'<name>foo\xc2\xa3hoo</name>'
|
||||
'</age>'
|
||||
'<name>bar</name>'
|
||||
'</age>'
|
||||
'<name>buz</name>'
|
||||
'</item>'
|
||||
'</items>'
|
||||
b'<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
b'<items>'
|
||||
b'<item>'
|
||||
b'<age>'
|
||||
b'<age>'
|
||||
b'<age>22</age>'
|
||||
b'<name>foo\xc2\xa3hoo</name>'
|
||||
b'</age>'
|
||||
b'<name>bar</name>'
|
||||
b'</age>'
|
||||
b'<name>buz</name>'
|
||||
b'</item>'
|
||||
b'</items>'
|
||||
)
|
||||
|
||||
def test_nested_list_item(self):
|
||||
@ -288,16 +322,16 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
||||
i3 = TestItem(name=u'buz', age=[i1, i2])
|
||||
|
||||
self.assertExportResult(i3,
|
||||
'<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
'<items>'
|
||||
'<item>'
|
||||
'<age>'
|
||||
'<value><name>foo</name></value>'
|
||||
'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>'
|
||||
'</age>'
|
||||
'<name>buz</name>'
|
||||
'</item>'
|
||||
'</items>'
|
||||
b'<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
b'<items>'
|
||||
b'<item>'
|
||||
b'<age>'
|
||||
b'<value><name>foo</name></value>'
|
||||
b'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>'
|
||||
b'</age>'
|
||||
b'<name>buz</name>'
|
||||
b'</item>'
|
||||
b'</items>'
|
||||
)
|
||||
|
||||
|
||||
@ -309,7 +343,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
|
||||
return JsonLinesItemExporter(self.output, **kwargs)
|
||||
|
||||
def _check_output(self):
|
||||
exported = json.loads(self.output.getvalue().strip())
|
||||
exported = json.loads(to_unicode(self.output.getvalue().strip()))
|
||||
self.assertEqual(exported, dict(self.i))
|
||||
|
||||
def test_nested_item(self):
|
||||
@ -319,7 +353,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
|
||||
self.ie.start_exporting()
|
||||
self.ie.export_item(i3)
|
||||
self.ie.finish_exporting()
|
||||
exported = json.loads(self.output.getvalue())
|
||||
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||
self.assertEqual(exported, self._expected_nested)
|
||||
|
||||
def test_extra_keywords(self):
|
||||
@ -337,7 +371,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
||||
return JsonItemExporter(self.output, **kwargs)
|
||||
|
||||
def _check_output(self):
|
||||
exported = json.loads(self.output.getvalue().strip())
|
||||
exported = json.loads(to_unicode(self.output.getvalue().strip()))
|
||||
self.assertEqual(exported, [dict(self.i)])
|
||||
|
||||
def assertTwoItemsExported(self, item):
|
||||
@ -345,7 +379,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
||||
self.ie.export_item(item)
|
||||
self.ie.export_item(item)
|
||||
self.ie.finish_exporting()
|
||||
exported = json.loads(self.output.getvalue())
|
||||
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||
self.assertEqual(exported, [dict(item), dict(item)])
|
||||
|
||||
def test_two_items(self):
|
||||
@ -361,7 +395,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
||||
self.ie.start_exporting()
|
||||
self.ie.export_item(i3)
|
||||
self.ie.finish_exporting()
|
||||
exported = json.loads(self.output.getvalue())
|
||||
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': dict(i1)}}
|
||||
self.assertEqual(exported, [expected])
|
||||
|
||||
@ -372,7 +406,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
||||
self.ie.start_exporting()
|
||||
self.ie.export_item(i3)
|
||||
self.ie.finish_exporting()
|
||||
exported = json.loads(self.output.getvalue())
|
||||
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': i1}}
|
||||
self.assertEqual(exported, [expected])
|
||||
|
||||
|
@ -5,7 +5,6 @@ import json
|
||||
from io import BytesIO
|
||||
import tempfile
|
||||
import shutil
|
||||
import six
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
||||
from zope.interface.verify import verifyObject
|
||||
@ -22,6 +21,7 @@ from scrapy.extensions.feedexport import (
|
||||
S3FeedStorage, StdoutFeedStorage
|
||||
)
|
||||
from scrapy.utils.test import assert_aws_environ
|
||||
from scrapy.utils.python import to_native_str
|
||||
|
||||
|
||||
class FileFeedStorageTest(unittest.TestCase):
|
||||
@ -120,8 +120,6 @@ class StdoutFeedStorageTest(unittest.TestCase):
|
||||
|
||||
class FeedExportTest(unittest.TestCase):
|
||||
|
||||
skip = not six.PY2
|
||||
|
||||
class MyItem(scrapy.Item):
|
||||
foo = scrapy.Field()
|
||||
egg = scrapy.Field()
|
||||
@ -170,7 +168,7 @@ class FeedExportTest(unittest.TestCase):
|
||||
settings.update({'FEED_FORMAT': 'csv'})
|
||||
data = yield self.exported_data(items, settings)
|
||||
|
||||
reader = csv.DictReader(data.splitlines())
|
||||
reader = csv.DictReader(to_native_str(data).splitlines())
|
||||
got_rows = list(reader)
|
||||
if ordered:
|
||||
self.assertEqual(reader.fieldnames, header)
|
||||
@ -184,14 +182,57 @@ class FeedExportTest(unittest.TestCase):
|
||||
settings = settings or {}
|
||||
settings.update({'FEED_FORMAT': 'jl'})
|
||||
data = yield self.exported_data(items, settings)
|
||||
parsed = [json.loads(line) for line in data.splitlines()]
|
||||
parsed = [json.loads(to_native_str(line)) for line in data.splitlines()]
|
||||
rows = [{k: v for k, v in row.items() if v} for row in rows]
|
||||
self.assertEqual(rows, parsed)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def assertExportedXml(self, items, rows, settings=None):
|
||||
settings = settings or {}
|
||||
settings.update({'FEED_FORMAT': 'xml'})
|
||||
data = yield self.exported_data(items, settings)
|
||||
rows = [{k: v for k, v in row.items() if v} for row in rows]
|
||||
import lxml.etree
|
||||
root = lxml.etree.fromstring(data)
|
||||
got_rows = [{e.tag: e.text for e in it} for it in root.findall('item')]
|
||||
self.assertEqual(rows, got_rows)
|
||||
|
||||
def _load_until_eof(self, data, load_func):
|
||||
bytes_output = BytesIO(data)
|
||||
result = []
|
||||
while True:
|
||||
try:
|
||||
result.append(load_func(bytes_output))
|
||||
except EOFError:
|
||||
break
|
||||
return result
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def assertExportedPickle(self, items, rows, settings=None):
|
||||
settings = settings or {}
|
||||
settings.update({'FEED_FORMAT': 'pickle'})
|
||||
data = yield self.exported_data(items, settings)
|
||||
expected = [{k: v for k, v in row.items() if v} for row in rows]
|
||||
import pickle
|
||||
result = self._load_until_eof(data, load_func=pickle.load)
|
||||
self.assertEqual(expected, result)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def assertExportedMarshal(self, items, rows, settings=None):
|
||||
settings = settings or {}
|
||||
settings.update({'FEED_FORMAT': 'marshal'})
|
||||
data = yield self.exported_data(items, settings)
|
||||
expected = [{k: v for k, v in row.items() if v} for row in rows]
|
||||
import marshal
|
||||
result = self._load_until_eof(data, load_func=marshal.load)
|
||||
self.assertEqual(expected, result)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def assertExported(self, items, header, rows, settings=None, ordered=True):
|
||||
yield self.assertExportedCsv(items, header, rows, settings, ordered)
|
||||
yield self.assertExportedJsonLines(items, rows, settings)
|
||||
yield self.assertExportedXml(items, rows, settings)
|
||||
yield self.assertExportedPickle(items, rows, settings)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_export_items(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user