mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 07:24:09 +00:00
Merge pull request #1499 from scrapy/py3-port-exporters
[MRG+1] PY3 exporters
This commit is contained in:
commit
a7b86137d0
@ -3,6 +3,7 @@ Item Exporters are used to export/serialize items into different formats.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
|
import io
|
||||||
import sys
|
import sys
|
||||||
import pprint
|
import pprint
|
||||||
import marshal
|
import marshal
|
||||||
@ -11,7 +12,11 @@ from six.moves import cPickle as pickle
|
|||||||
from xml.sax.saxutils import XMLGenerator
|
from xml.sax.saxutils import XMLGenerator
|
||||||
|
|
||||||
from scrapy.utils.serialize import ScrapyJSONEncoder
|
from scrapy.utils.serialize import ScrapyJSONEncoder
|
||||||
|
from scrapy.utils.python import to_bytes, to_unicode, to_native_str, is_listlike
|
||||||
from scrapy.item import BaseItem
|
from scrapy.item import BaseItem
|
||||||
|
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
|
__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
|
||||||
'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
|
'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
|
||||||
@ -38,7 +43,7 @@ class BaseItemExporter(object):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def serialize_field(self, field, name, value):
|
def serialize_field(self, field, name, value):
|
||||||
serializer = field.get('serializer', self._to_str_if_unicode)
|
serializer = field.get('serializer', lambda x: x)
|
||||||
return serializer(value)
|
return serializer(value)
|
||||||
|
|
||||||
def start_exporting(self):
|
def start_exporting(self):
|
||||||
@ -47,9 +52,6 @@ class BaseItemExporter(object):
|
|||||||
def finish_exporting(self):
|
def finish_exporting(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _to_str_if_unicode(self, value):
|
|
||||||
return value.encode(self.encoding) if isinstance(value, unicode) else value
|
|
||||||
|
|
||||||
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
|
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
|
||||||
"""Return the fields to export as an iterable of tuples
|
"""Return the fields to export as an iterable of tuples
|
||||||
(name, serialized_value)
|
(name, serialized_value)
|
||||||
@ -86,10 +88,10 @@ class JsonLinesItemExporter(BaseItemExporter):
|
|||||||
|
|
||||||
def export_item(self, item):
|
def export_item(self, item):
|
||||||
itemdict = dict(self._get_serialized_fields(item))
|
itemdict = dict(self._get_serialized_fields(item))
|
||||||
self.file.write(self.encoder.encode(itemdict) + '\n')
|
self.file.write(to_bytes(self.encoder.encode(itemdict) + '\n'))
|
||||||
|
|
||||||
|
|
||||||
class JsonItemExporter(JsonLinesItemExporter):
|
class JsonItemExporter(BaseItemExporter):
|
||||||
|
|
||||||
def __init__(self, file, **kwargs):
|
def __init__(self, file, **kwargs):
|
||||||
self._configure(kwargs, dont_fail=True)
|
self._configure(kwargs, dont_fail=True)
|
||||||
@ -98,18 +100,18 @@ class JsonItemExporter(JsonLinesItemExporter):
|
|||||||
self.first_item = True
|
self.first_item = True
|
||||||
|
|
||||||
def start_exporting(self):
|
def start_exporting(self):
|
||||||
self.file.write("[")
|
self.file.write(b"[")
|
||||||
|
|
||||||
def finish_exporting(self):
|
def finish_exporting(self):
|
||||||
self.file.write("]")
|
self.file.write(b"]")
|
||||||
|
|
||||||
def export_item(self, item):
|
def export_item(self, item):
|
||||||
if self.first_item:
|
if self.first_item:
|
||||||
self.first_item = False
|
self.first_item = False
|
||||||
else:
|
else:
|
||||||
self.file.write(',\n')
|
self.file.write(b',\n')
|
||||||
itemdict = dict(self._get_serialized_fields(item))
|
itemdict = dict(self._get_serialized_fields(item))
|
||||||
self.file.write(self.encoder.encode(itemdict))
|
self.file.write(to_bytes(self.encoder.encode(itemdict)))
|
||||||
|
|
||||||
|
|
||||||
class XmlItemExporter(BaseItemExporter):
|
class XmlItemExporter(BaseItemExporter):
|
||||||
@ -139,7 +141,7 @@ class XmlItemExporter(BaseItemExporter):
|
|||||||
if hasattr(serialized_value, 'items'):
|
if hasattr(serialized_value, 'items'):
|
||||||
for subname, value in serialized_value.items():
|
for subname, value in serialized_value.items():
|
||||||
self._export_xml_field(subname, value)
|
self._export_xml_field(subname, value)
|
||||||
elif hasattr(serialized_value, '__iter__'):
|
elif is_listlike(serialized_value):
|
||||||
for value in serialized_value:
|
for value in serialized_value:
|
||||||
self._export_xml_field('value', value)
|
self._export_xml_field('value', value)
|
||||||
else:
|
else:
|
||||||
@ -153,10 +155,10 @@ class XmlItemExporter(BaseItemExporter):
|
|||||||
# and Python 3.x will require unicode, so ">= 2.7.4" should be fine.
|
# and Python 3.x will require unicode, so ">= 2.7.4" should be fine.
|
||||||
if sys.version_info[:3] >= (2, 7, 4):
|
if sys.version_info[:3] >= (2, 7, 4):
|
||||||
def _xg_characters(self, serialized_value):
|
def _xg_characters(self, serialized_value):
|
||||||
if not isinstance(serialized_value, unicode):
|
if not isinstance(serialized_value, six.text_type):
|
||||||
serialized_value = serialized_value.decode(self.encoding)
|
serialized_value = serialized_value.decode(self.encoding)
|
||||||
return self.xg.characters(serialized_value)
|
return self.xg.characters(serialized_value)
|
||||||
else:
|
else: # pragma: no cover
|
||||||
def _xg_characters(self, serialized_value):
|
def _xg_characters(self, serialized_value):
|
||||||
return self.xg.characters(serialized_value)
|
return self.xg.characters(serialized_value)
|
||||||
|
|
||||||
@ -166,17 +168,22 @@ class CsvItemExporter(BaseItemExporter):
|
|||||||
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
|
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
|
||||||
self._configure(kwargs, dont_fail=True)
|
self._configure(kwargs, dont_fail=True)
|
||||||
self.include_headers_line = include_headers_line
|
self.include_headers_line = include_headers_line
|
||||||
|
file = file if six.PY2 else io.TextIOWrapper(file, line_buffering=True)
|
||||||
self.csv_writer = csv.writer(file, **kwargs)
|
self.csv_writer = csv.writer(file, **kwargs)
|
||||||
self._headers_not_written = True
|
self._headers_not_written = True
|
||||||
self._join_multivalued = join_multivalued
|
self._join_multivalued = join_multivalued
|
||||||
|
|
||||||
def _to_str_if_unicode(self, value):
|
def serialize_field(self, field, name, value):
|
||||||
|
serializer = field.get('serializer', self._join_if_needed)
|
||||||
|
return serializer(value)
|
||||||
|
|
||||||
|
def _join_if_needed(self, value):
|
||||||
if isinstance(value, (list, tuple)):
|
if isinstance(value, (list, tuple)):
|
||||||
try:
|
try:
|
||||||
value = self._join_multivalued.join(value)
|
return self._join_multivalued.join(value)
|
||||||
except TypeError: # list in value may not contain strings
|
except TypeError: # list in value may not contain strings
|
||||||
pass
|
pass
|
||||||
return super(CsvItemExporter, self)._to_str_if_unicode(value)
|
return value
|
||||||
|
|
||||||
def export_item(self, item):
|
def export_item(self, item):
|
||||||
if self._headers_not_written:
|
if self._headers_not_written:
|
||||||
@ -185,9 +192,16 @@ class CsvItemExporter(BaseItemExporter):
|
|||||||
|
|
||||||
fields = self._get_serialized_fields(item, default_value='',
|
fields = self._get_serialized_fields(item, default_value='',
|
||||||
include_empty=True)
|
include_empty=True)
|
||||||
values = [x[1] for x in fields]
|
values = list(self._build_row(x for _, x in fields))
|
||||||
self.csv_writer.writerow(values)
|
self.csv_writer.writerow(values)
|
||||||
|
|
||||||
|
def _build_row(self, values):
|
||||||
|
for s in values:
|
||||||
|
try:
|
||||||
|
yield to_native_str(s)
|
||||||
|
except TypeError:
|
||||||
|
yield to_native_str(repr(s))
|
||||||
|
|
||||||
def _write_headers_and_set_fields_to_export(self, item):
|
def _write_headers_and_set_fields_to_export(self, item):
|
||||||
if self.include_headers_line:
|
if self.include_headers_line:
|
||||||
if not self.fields_to_export:
|
if not self.fields_to_export:
|
||||||
@ -197,7 +211,8 @@ class CsvItemExporter(BaseItemExporter):
|
|||||||
else:
|
else:
|
||||||
# use fields declared in Item
|
# use fields declared in Item
|
||||||
self.fields_to_export = list(item.fields.keys())
|
self.fields_to_export = list(item.fields.keys())
|
||||||
self.csv_writer.writerow(self.fields_to_export)
|
row = list(self._build_row(self.fields_to_export))
|
||||||
|
self.csv_writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
class PickleItemExporter(BaseItemExporter):
|
class PickleItemExporter(BaseItemExporter):
|
||||||
@ -230,7 +245,7 @@ class PprintItemExporter(BaseItemExporter):
|
|||||||
|
|
||||||
def export_item(self, item):
|
def export_item(self, item):
|
||||||
itemdict = dict(self._get_serialized_fields(item))
|
itemdict = dict(self._get_serialized_fields(item))
|
||||||
self.file.write(pprint.pformat(itemdict) + '\n')
|
self.file.write(to_bytes(pprint.pformat(itemdict) + '\n'))
|
||||||
|
|
||||||
|
|
||||||
class PythonItemExporter(BaseItemExporter):
|
class PythonItemExporter(BaseItemExporter):
|
||||||
@ -239,6 +254,13 @@ class PythonItemExporter(BaseItemExporter):
|
|||||||
json, msgpack, binc, etc) can be used on top of it. Its main goal is to
|
json, msgpack, binc, etc) can be used on top of it. Its main goal is to
|
||||||
seamless support what BaseItemExporter does plus nested items.
|
seamless support what BaseItemExporter does plus nested items.
|
||||||
"""
|
"""
|
||||||
|
def _configure(self, options, dont_fail=False):
|
||||||
|
self.binary = options.pop('binary', True)
|
||||||
|
super(PythonItemExporter, self)._configure(options, dont_fail)
|
||||||
|
if self.binary:
|
||||||
|
warnings.warn(
|
||||||
|
"PythonItemExporter will drop support for binary export in the future",
|
||||||
|
ScrapyDeprecationWarning)
|
||||||
|
|
||||||
def serialize_field(self, field, name, value):
|
def serialize_field(self, field, name, value):
|
||||||
serializer = field.get('serializer', self._serialize_value)
|
serializer = field.get('serializer', self._serialize_value)
|
||||||
@ -249,13 +271,20 @@ class PythonItemExporter(BaseItemExporter):
|
|||||||
return self.export_item(value)
|
return self.export_item(value)
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
return dict(self._serialize_dict(value))
|
return dict(self._serialize_dict(value))
|
||||||
if hasattr(value, '__iter__'):
|
if is_listlike(value):
|
||||||
return [self._serialize_value(v) for v in value]
|
return [self._serialize_value(v) for v in value]
|
||||||
return self._to_str_if_unicode(value)
|
if self.binary:
|
||||||
|
return to_bytes(value, encoding=self.encoding)
|
||||||
|
else:
|
||||||
|
return to_unicode(value, encoding=self.encoding)
|
||||||
|
|
||||||
def _serialize_dict(self, value):
|
def _serialize_dict(self, value):
|
||||||
for key, val in six.iteritems(value):
|
for key, val in six.iteritems(value):
|
||||||
|
key = to_bytes(key) if self.binary else key
|
||||||
yield key, self._serialize_value(val)
|
yield key, self._serialize_value(val)
|
||||||
|
|
||||||
def export_item(self, item):
|
def export_item(self, item):
|
||||||
return dict(self._get_serialized_fields(item))
|
result = dict(self._get_serialized_fields(item))
|
||||||
|
if self.binary:
|
||||||
|
result = dict(self._serialize_dict(result))
|
||||||
|
return result
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
tests/test_exporters.py
|
|
||||||
tests/test_linkextractors_deprecated.py
|
tests/test_linkextractors_deprecated.py
|
||||||
tests/test_proxy_connect.py
|
tests/test_proxy_connect.py
|
||||||
|
|
||||||
|
@ -1,17 +1,21 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import marshal
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from six.moves import cPickle as pickle
|
from six.moves import cPickle as pickle
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
|
import six
|
||||||
|
|
||||||
from scrapy.item import Item, Field
|
from scrapy.item import Item, Field
|
||||||
from scrapy.utils.python import to_unicode
|
from scrapy.utils.python import to_unicode
|
||||||
from scrapy.exporters import (
|
from scrapy.exporters import (
|
||||||
BaseItemExporter, PprintItemExporter, PickleItemExporter, CsvItemExporter,
|
BaseItemExporter, PprintItemExporter, PickleItemExporter, CsvItemExporter,
|
||||||
XmlItemExporter, JsonLinesItemExporter, JsonItemExporter, PythonItemExporter
|
XmlItemExporter, JsonLinesItemExporter, JsonItemExporter,
|
||||||
|
PythonItemExporter, MarshalItemExporter
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -23,7 +27,7 @@ class TestItem(Item):
|
|||||||
class BaseItemExporterTest(unittest.TestCase):
|
class BaseItemExporterTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.i = TestItem(name=u'John\xa3', age='22')
|
self.i = TestItem(name=u'John\xa3', age=u'22')
|
||||||
self.output = BytesIO()
|
self.output = BytesIO()
|
||||||
self.ie = self._get_exporter()
|
self.ie = self._get_exporter()
|
||||||
|
|
||||||
@ -56,19 +60,19 @@ class BaseItemExporterTest(unittest.TestCase):
|
|||||||
|
|
||||||
def test_serialize_field(self):
|
def test_serialize_field(self):
|
||||||
res = self.ie.serialize_field(self.i.fields['name'], 'name', self.i['name'])
|
res = self.ie.serialize_field(self.i.fields['name'], 'name', self.i['name'])
|
||||||
self.assertEqual(res, 'John\xc2\xa3')
|
self.assertEqual(res, u'John\xa3')
|
||||||
|
|
||||||
res = self.ie.serialize_field(self.i.fields['age'], 'age', self.i['age'])
|
res = self.ie.serialize_field(self.i.fields['age'], 'age', self.i['age'])
|
||||||
self.assertEqual(res, '22')
|
self.assertEqual(res, u'22')
|
||||||
|
|
||||||
def test_fields_to_export(self):
|
def test_fields_to_export(self):
|
||||||
ie = self._get_exporter(fields_to_export=['name'])
|
ie = self._get_exporter(fields_to_export=['name'])
|
||||||
self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', 'John\xc2\xa3')])
|
self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', u'John\xa3')])
|
||||||
|
|
||||||
ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1')
|
ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1')
|
||||||
name = list(ie._get_serialized_fields(self.i))[0][1]
|
_, name = list(ie._get_serialized_fields(self.i))[0]
|
||||||
assert isinstance(name, str)
|
assert isinstance(name, six.text_type)
|
||||||
self.assertEqual(name, 'John\xa3')
|
self.assertEqual(name, u'John\xa3')
|
||||||
|
|
||||||
def test_field_custom_serializer(self):
|
def test_field_custom_serializer(self):
|
||||||
def custom_serializer(value):
|
def custom_serializer(value):
|
||||||
@ -78,16 +82,20 @@ class BaseItemExporterTest(unittest.TestCase):
|
|||||||
name = Field()
|
name = Field()
|
||||||
age = Field(serializer=custom_serializer)
|
age = Field(serializer=custom_serializer)
|
||||||
|
|
||||||
i = CustomFieldItem(name=u'John\xa3', age='22')
|
i = CustomFieldItem(name=u'John\xa3', age=u'22')
|
||||||
|
|
||||||
ie = self._get_exporter()
|
ie = self._get_exporter()
|
||||||
self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), 'John\xc2\xa3')
|
self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), u'John\xa3')
|
||||||
self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24')
|
self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24')
|
||||||
|
|
||||||
|
|
||||||
class PythonItemExporterTest(BaseItemExporterTest):
|
class PythonItemExporterTest(BaseItemExporterTest):
|
||||||
def _get_exporter(self, **kwargs):
|
def _get_exporter(self, **kwargs):
|
||||||
return PythonItemExporter(**kwargs)
|
return PythonItemExporter(binary=False, **kwargs)
|
||||||
|
|
||||||
|
def test_invalid_option(self):
|
||||||
|
with self.assertRaisesRegexp(TypeError, "Unexpected options: invalid_option"):
|
||||||
|
PythonItemExporter(invalid_option='something')
|
||||||
|
|
||||||
def test_nested_item(self):
|
def test_nested_item(self):
|
||||||
i1 = TestItem(name=u'Joseph', age='22')
|
i1 = TestItem(name=u'Joseph', age='22')
|
||||||
@ -120,6 +128,12 @@ class PythonItemExporterTest(BaseItemExporterTest):
|
|||||||
self.assertEqual(type(exported['age'][0]), dict)
|
self.assertEqual(type(exported['age'][0]), dict)
|
||||||
self.assertEqual(type(exported['age'][0]['age'][0]), dict)
|
self.assertEqual(type(exported['age'][0]['age'][0]), dict)
|
||||||
|
|
||||||
|
def test_export_binary(self):
|
||||||
|
exporter = PythonItemExporter(binary=True)
|
||||||
|
value = TestItem(name=u'John\xa3', age=u'22')
|
||||||
|
expected = {b'name': b'John\xc2\xa3', b'age': b'22'}
|
||||||
|
self.assertEqual(expected, exporter.export_item(value))
|
||||||
|
|
||||||
|
|
||||||
class PprintItemExporterTest(BaseItemExporterTest):
|
class PprintItemExporterTest(BaseItemExporterTest):
|
||||||
|
|
||||||
@ -152,18 +166,30 @@ class PickleItemExporterTest(BaseItemExporterTest):
|
|||||||
self.assertEqual(pickle.load(f), i2)
|
self.assertEqual(pickle.load(f), i2)
|
||||||
|
|
||||||
|
|
||||||
class CsvItemExporterTest(BaseItemExporterTest):
|
class MarshalItemExporterTest(BaseItemExporterTest):
|
||||||
|
|
||||||
|
def _get_exporter(self, **kwargs):
|
||||||
|
self.output = tempfile.TemporaryFile()
|
||||||
|
return MarshalItemExporter(self.output, **kwargs)
|
||||||
|
|
||||||
|
def _check_output(self):
|
||||||
|
self.output.seek(0)
|
||||||
|
self._assert_expected_item(marshal.load(self.output))
|
||||||
|
|
||||||
|
|
||||||
|
class CsvItemExporterTest(BaseItemExporterTest):
|
||||||
def _get_exporter(self, **kwargs):
|
def _get_exporter(self, **kwargs):
|
||||||
return CsvItemExporter(self.output, **kwargs)
|
return CsvItemExporter(self.output, **kwargs)
|
||||||
|
|
||||||
def assertCsvEqual(self, first, second, msg=None):
|
def assertCsvEqual(self, first, second, msg=None):
|
||||||
|
first = to_unicode(first)
|
||||||
|
second = to_unicode(second)
|
||||||
csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
|
csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
|
||||||
for line in csv.splitlines(True)]
|
for line in csv.splitlines(True)]
|
||||||
return self.assertEqual(csvsplit(first), csvsplit(second), msg)
|
return self.assertEqual(csvsplit(first), csvsplit(second), msg)
|
||||||
|
|
||||||
def _check_output(self):
|
def _check_output(self):
|
||||||
self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
|
self.assertCsvEqual(to_unicode(self.output.getvalue()), u'age,name\r\n22,John\xa3\r\n')
|
||||||
|
|
||||||
def assertExportResult(self, item, expected, **kwargs):
|
def assertExportResult(self, item, expected, **kwargs):
|
||||||
fp = BytesIO()
|
fp = BytesIO()
|
||||||
@ -177,13 +203,13 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
|||||||
self.assertExportResult(
|
self.assertExportResult(
|
||||||
item=self.i,
|
item=self.i,
|
||||||
fields_to_export=self.i.fields.keys(),
|
fields_to_export=self.i.fields.keys(),
|
||||||
expected='age,name\r\n22,John\xc2\xa3\r\n',
|
expected=b'age,name\r\n22,John\xc2\xa3\r\n',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_header_export_all_dict(self):
|
def test_header_export_all_dict(self):
|
||||||
self.assertExportResult(
|
self.assertExportResult(
|
||||||
item=dict(self.i),
|
item=dict(self.i),
|
||||||
expected='age,name\r\n22,John\xc2\xa3\r\n',
|
expected=b'age,name\r\n22,John\xc2\xa3\r\n',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_header_export_single_field(self):
|
def test_header_export_single_field(self):
|
||||||
@ -191,7 +217,7 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
|||||||
self.assertExportResult(
|
self.assertExportResult(
|
||||||
item=item,
|
item=item,
|
||||||
fields_to_export=['age'],
|
fields_to_export=['age'],
|
||||||
expected='age\r\n22\r\n',
|
expected=b'age\r\n22\r\n',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_header_export_two_items(self):
|
def test_header_export_two_items(self):
|
||||||
@ -202,14 +228,15 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
|||||||
ie.export_item(item)
|
ie.export_item(item)
|
||||||
ie.export_item(item)
|
ie.export_item(item)
|
||||||
ie.finish_exporting()
|
ie.finish_exporting()
|
||||||
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
|
self.assertCsvEqual(output.getvalue(),
|
||||||
|
b'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
|
||||||
|
|
||||||
def test_header_no_header_line(self):
|
def test_header_no_header_line(self):
|
||||||
for item in [self.i, dict(self.i)]:
|
for item in [self.i, dict(self.i)]:
|
||||||
self.assertExportResult(
|
self.assertExportResult(
|
||||||
item=item,
|
item=item,
|
||||||
include_headers_line=False,
|
include_headers_line=False,
|
||||||
expected='22,John\xc2\xa3\r\n',
|
expected=b'22,John\xc2\xa3\r\n',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_join_multivalue(self):
|
def test_join_multivalue(self):
|
||||||
@ -224,6 +251,13 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
|||||||
expected='"Mary,Paul",John\r\n',
|
expected='"Mary,Paul",John\r\n',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_join_multivalue_not_strings(self):
|
||||||
|
self.assertExportResult(
|
||||||
|
item=dict(name='John', friends=[4, 8]),
|
||||||
|
include_headers_line=False,
|
||||||
|
expected='"[4, 8]",John\r\n',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class XmlItemExporterTest(BaseItemExporterTest):
|
class XmlItemExporterTest(BaseItemExporterTest):
|
||||||
|
|
||||||
@ -252,13 +286,13 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
|||||||
self.assertXmlEquivalent(fp.getvalue(), expected_value)
|
self.assertXmlEquivalent(fp.getvalue(), expected_value)
|
||||||
|
|
||||||
def _check_output(self):
|
def _check_output(self):
|
||||||
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
|
expected_value = b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
|
||||||
self.assertXmlEquivalent(self.output.getvalue(), expected_value)
|
self.assertXmlEquivalent(self.output.getvalue(), expected_value)
|
||||||
|
|
||||||
def test_multivalued_fields(self):
|
def test_multivalued_fields(self):
|
||||||
self.assertExportResult(
|
self.assertExportResult(
|
||||||
TestItem(name=[u'John\xa3', u'Doe']),
|
TestItem(name=[u'John\xa3', u'Doe']),
|
||||||
'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
|
b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_nested_item(self):
|
def test_nested_item(self):
|
||||||
@ -267,19 +301,19 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
|||||||
i3 = TestItem(name=u'buz', age=i2)
|
i3 = TestItem(name=u'buz', age=i2)
|
||||||
|
|
||||||
self.assertExportResult(i3,
|
self.assertExportResult(i3,
|
||||||
'<?xml version="1.0" encoding="utf-8"?>\n'
|
b'<?xml version="1.0" encoding="utf-8"?>\n'
|
||||||
'<items>'
|
b'<items>'
|
||||||
'<item>'
|
b'<item>'
|
||||||
'<age>'
|
b'<age>'
|
||||||
'<age>'
|
b'<age>'
|
||||||
'<age>22</age>'
|
b'<age>22</age>'
|
||||||
'<name>foo\xc2\xa3hoo</name>'
|
b'<name>foo\xc2\xa3hoo</name>'
|
||||||
'</age>'
|
b'</age>'
|
||||||
'<name>bar</name>'
|
b'<name>bar</name>'
|
||||||
'</age>'
|
b'</age>'
|
||||||
'<name>buz</name>'
|
b'<name>buz</name>'
|
||||||
'</item>'
|
b'</item>'
|
||||||
'</items>'
|
b'</items>'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_nested_list_item(self):
|
def test_nested_list_item(self):
|
||||||
@ -288,16 +322,16 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
|||||||
i3 = TestItem(name=u'buz', age=[i1, i2])
|
i3 = TestItem(name=u'buz', age=[i1, i2])
|
||||||
|
|
||||||
self.assertExportResult(i3,
|
self.assertExportResult(i3,
|
||||||
'<?xml version="1.0" encoding="utf-8"?>\n'
|
b'<?xml version="1.0" encoding="utf-8"?>\n'
|
||||||
'<items>'
|
b'<items>'
|
||||||
'<item>'
|
b'<item>'
|
||||||
'<age>'
|
b'<age>'
|
||||||
'<value><name>foo</name></value>'
|
b'<value><name>foo</name></value>'
|
||||||
'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>'
|
b'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>'
|
||||||
'</age>'
|
b'</age>'
|
||||||
'<name>buz</name>'
|
b'<name>buz</name>'
|
||||||
'</item>'
|
b'</item>'
|
||||||
'</items>'
|
b'</items>'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -309,7 +343,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
|
|||||||
return JsonLinesItemExporter(self.output, **kwargs)
|
return JsonLinesItemExporter(self.output, **kwargs)
|
||||||
|
|
||||||
def _check_output(self):
|
def _check_output(self):
|
||||||
exported = json.loads(self.output.getvalue().strip())
|
exported = json.loads(to_unicode(self.output.getvalue().strip()))
|
||||||
self.assertEqual(exported, dict(self.i))
|
self.assertEqual(exported, dict(self.i))
|
||||||
|
|
||||||
def test_nested_item(self):
|
def test_nested_item(self):
|
||||||
@ -319,7 +353,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
|
|||||||
self.ie.start_exporting()
|
self.ie.start_exporting()
|
||||||
self.ie.export_item(i3)
|
self.ie.export_item(i3)
|
||||||
self.ie.finish_exporting()
|
self.ie.finish_exporting()
|
||||||
exported = json.loads(self.output.getvalue())
|
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||||
self.assertEqual(exported, self._expected_nested)
|
self.assertEqual(exported, self._expected_nested)
|
||||||
|
|
||||||
def test_extra_keywords(self):
|
def test_extra_keywords(self):
|
||||||
@ -337,7 +371,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
|||||||
return JsonItemExporter(self.output, **kwargs)
|
return JsonItemExporter(self.output, **kwargs)
|
||||||
|
|
||||||
def _check_output(self):
|
def _check_output(self):
|
||||||
exported = json.loads(self.output.getvalue().strip())
|
exported = json.loads(to_unicode(self.output.getvalue().strip()))
|
||||||
self.assertEqual(exported, [dict(self.i)])
|
self.assertEqual(exported, [dict(self.i)])
|
||||||
|
|
||||||
def assertTwoItemsExported(self, item):
|
def assertTwoItemsExported(self, item):
|
||||||
@ -345,7 +379,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
|||||||
self.ie.export_item(item)
|
self.ie.export_item(item)
|
||||||
self.ie.export_item(item)
|
self.ie.export_item(item)
|
||||||
self.ie.finish_exporting()
|
self.ie.finish_exporting()
|
||||||
exported = json.loads(self.output.getvalue())
|
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||||
self.assertEqual(exported, [dict(item), dict(item)])
|
self.assertEqual(exported, [dict(item), dict(item)])
|
||||||
|
|
||||||
def test_two_items(self):
|
def test_two_items(self):
|
||||||
@ -361,7 +395,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
|||||||
self.ie.start_exporting()
|
self.ie.start_exporting()
|
||||||
self.ie.export_item(i3)
|
self.ie.export_item(i3)
|
||||||
self.ie.finish_exporting()
|
self.ie.finish_exporting()
|
||||||
exported = json.loads(self.output.getvalue())
|
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||||
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': dict(i1)}}
|
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': dict(i1)}}
|
||||||
self.assertEqual(exported, [expected])
|
self.assertEqual(exported, [expected])
|
||||||
|
|
||||||
@ -372,7 +406,7 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
|||||||
self.ie.start_exporting()
|
self.ie.start_exporting()
|
||||||
self.ie.export_item(i3)
|
self.ie.export_item(i3)
|
||||||
self.ie.finish_exporting()
|
self.ie.finish_exporting()
|
||||||
exported = json.loads(self.output.getvalue())
|
exported = json.loads(to_unicode(self.output.getvalue()))
|
||||||
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': i1}}
|
expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': i1}}
|
||||||
self.assertEqual(exported, [expected])
|
self.assertEqual(exported, [expected])
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ import json
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
import six
|
|
||||||
from six.moves.urllib.parse import urlparse
|
from six.moves.urllib.parse import urlparse
|
||||||
|
|
||||||
from zope.interface.verify import verifyObject
|
from zope.interface.verify import verifyObject
|
||||||
@ -22,6 +21,7 @@ from scrapy.extensions.feedexport import (
|
|||||||
S3FeedStorage, StdoutFeedStorage
|
S3FeedStorage, StdoutFeedStorage
|
||||||
)
|
)
|
||||||
from scrapy.utils.test import assert_aws_environ
|
from scrapy.utils.test import assert_aws_environ
|
||||||
|
from scrapy.utils.python import to_native_str
|
||||||
|
|
||||||
|
|
||||||
class FileFeedStorageTest(unittest.TestCase):
|
class FileFeedStorageTest(unittest.TestCase):
|
||||||
@ -120,8 +120,6 @@ class StdoutFeedStorageTest(unittest.TestCase):
|
|||||||
|
|
||||||
class FeedExportTest(unittest.TestCase):
|
class FeedExportTest(unittest.TestCase):
|
||||||
|
|
||||||
skip = not six.PY2
|
|
||||||
|
|
||||||
class MyItem(scrapy.Item):
|
class MyItem(scrapy.Item):
|
||||||
foo = scrapy.Field()
|
foo = scrapy.Field()
|
||||||
egg = scrapy.Field()
|
egg = scrapy.Field()
|
||||||
@ -170,7 +168,7 @@ class FeedExportTest(unittest.TestCase):
|
|||||||
settings.update({'FEED_FORMAT': 'csv'})
|
settings.update({'FEED_FORMAT': 'csv'})
|
||||||
data = yield self.exported_data(items, settings)
|
data = yield self.exported_data(items, settings)
|
||||||
|
|
||||||
reader = csv.DictReader(data.splitlines())
|
reader = csv.DictReader(to_native_str(data).splitlines())
|
||||||
got_rows = list(reader)
|
got_rows = list(reader)
|
||||||
if ordered:
|
if ordered:
|
||||||
self.assertEqual(reader.fieldnames, header)
|
self.assertEqual(reader.fieldnames, header)
|
||||||
@ -184,14 +182,57 @@ class FeedExportTest(unittest.TestCase):
|
|||||||
settings = settings or {}
|
settings = settings or {}
|
||||||
settings.update({'FEED_FORMAT': 'jl'})
|
settings.update({'FEED_FORMAT': 'jl'})
|
||||||
data = yield self.exported_data(items, settings)
|
data = yield self.exported_data(items, settings)
|
||||||
parsed = [json.loads(line) for line in data.splitlines()]
|
parsed = [json.loads(to_native_str(line)) for line in data.splitlines()]
|
||||||
rows = [{k: v for k, v in row.items() if v} for row in rows]
|
rows = [{k: v for k, v in row.items() if v} for row in rows]
|
||||||
self.assertEqual(rows, parsed)
|
self.assertEqual(rows, parsed)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def assertExportedXml(self, items, rows, settings=None):
|
||||||
|
settings = settings or {}
|
||||||
|
settings.update({'FEED_FORMAT': 'xml'})
|
||||||
|
data = yield self.exported_data(items, settings)
|
||||||
|
rows = [{k: v for k, v in row.items() if v} for row in rows]
|
||||||
|
import lxml.etree
|
||||||
|
root = lxml.etree.fromstring(data)
|
||||||
|
got_rows = [{e.tag: e.text for e in it} for it in root.findall('item')]
|
||||||
|
self.assertEqual(rows, got_rows)
|
||||||
|
|
||||||
|
def _load_until_eof(self, data, load_func):
|
||||||
|
bytes_output = BytesIO(data)
|
||||||
|
result = []
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
result.append(load_func(bytes_output))
|
||||||
|
except EOFError:
|
||||||
|
break
|
||||||
|
return result
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def assertExportedPickle(self, items, rows, settings=None):
|
||||||
|
settings = settings or {}
|
||||||
|
settings.update({'FEED_FORMAT': 'pickle'})
|
||||||
|
data = yield self.exported_data(items, settings)
|
||||||
|
expected = [{k: v for k, v in row.items() if v} for row in rows]
|
||||||
|
import pickle
|
||||||
|
result = self._load_until_eof(data, load_func=pickle.load)
|
||||||
|
self.assertEqual(expected, result)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def assertExportedMarshal(self, items, rows, settings=None):
|
||||||
|
settings = settings or {}
|
||||||
|
settings.update({'FEED_FORMAT': 'marshal'})
|
||||||
|
data = yield self.exported_data(items, settings)
|
||||||
|
expected = [{k: v for k, v in row.items() if v} for row in rows]
|
||||||
|
import marshal
|
||||||
|
result = self._load_until_eof(data, load_func=marshal.load)
|
||||||
|
self.assertEqual(expected, result)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def assertExported(self, items, header, rows, settings=None, ordered=True):
|
def assertExported(self, items, header, rows, settings=None, ordered=True):
|
||||||
yield self.assertExportedCsv(items, header, rows, settings, ordered)
|
yield self.assertExportedCsv(items, header, rows, settings, ordered)
|
||||||
yield self.assertExportedJsonLines(items, rows, settings)
|
yield self.assertExportedJsonLines(items, rows, settings)
|
||||||
|
yield self.assertExportedXml(items, rows, settings)
|
||||||
|
yield self.assertExportedPickle(items, rows, settings)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def test_export_items(self):
|
def test_export_items(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user