diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index 85c73222d..b6139af92 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -140,7 +140,7 @@ output examples, which assume you're exporting these two items::
BaseItemExporter
----------------
-.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8')
+.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=0)
This is the (abstract) base class for all Item Exporters. It provides
support for common features used by all (concrete) Item Exporters, such as
@@ -149,7 +149,7 @@ BaseItemExporter
These features can be configured through the constructor arguments which
populate their respective instance attributes: :attr:`fields_to_export`,
- :attr:`export_empty_fields`, :attr:`encoding`.
+ :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`.
.. method:: export_item(item)
@@ -216,6 +216,15 @@ BaseItemExporter
encoding). Other value types are passed unchanged to the specific
serialization library.
+ .. attribute:: indent
+
+ Amount of spaces used to indent the output on each level. Defaults to ``0``.
+
+ * ``indent=None`` selects the most compact representation,
+ all items in the same line with no indentation
+ * ``indent<=0`` each item on its own line, no indentation
+ * ``indent>0`` each item on its own line, indented with the provided numeric value
+
.. highlight:: none
XmlItemExporter
diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst
index efdd8c46b..135d05c93 100644
--- a/docs/topics/feed-exports.rst
+++ b/docs/topics/feed-exports.rst
@@ -209,6 +209,7 @@ These are the settings used for configuring the feed exports:
* :setting:`FEED_STORE_EMPTY`
* :setting:`FEED_EXPORT_ENCODING`
* :setting:`FEED_EXPORT_FIELDS`
+ * :setting:`FEED_EXPORT_INDENT`
.. currentmodule:: scrapy.extensions.feedexport
@@ -266,6 +267,22 @@ If an exporter requires a fixed set of fields (this is the case for
is empty or None, then Scrapy tries to infer field names from the
exported data - currently it uses field names from the first item.
+.. setting:: FEED_EXPORT_INDENT
+
+FEED_EXPORT_INDENT
+------------------
+
+Default: ``0``
+
+Amount of spaces used to indent the output on each level. If ``FEED_EXPORT_INDENT``
+is a non-negative integer, then array elements and object members will be pretty-printed
+with that indent level. An indent level of ``0`` (the default), or negative,
+will put each item on a new line. ``None`` selects the most compact representation.
+
+Currently implemented only by :class:`~scrapy.exporters.JsonItemExporter`
+and :class:`~scrapy.exporters.XmlItemExporter`, i.e. when you are exporting
+to ``.json`` or ``.xml``.
+
.. setting:: FEED_STORE_EMPTY
FEED_STORE_EMPTY
diff --git a/scrapy/exporters.py b/scrapy/exporters.py
index c4b1b3476..e2d42b6ab 100644
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@@ -36,6 +36,7 @@ class BaseItemExporter(object):
self.encoding = options.pop('encoding', None)
self.fields_to_export = options.pop('fields_to_export', None)
self.export_empty_fields = options.pop('export_empty_fields', False)
+ self.indent = options.pop('indent', None)
if not dont_fail and options:
raise TypeError("Unexpected options: %s" % ', '.join(options.keys()))
@@ -98,21 +99,33 @@ class JsonItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
+ # there is a small difference between the behaviour or JsonItemExporter.indent
+ # and ScrapyJSONEncoder.indent. ScrapyJSONEncoder.indent=None is needed to prevent
+ # the addition of newlines everywhere
+ json_indent = self.indent if self.indent is not None and self.indent > 0 else None
+ kwargs.setdefault('indent', json_indent)
kwargs.setdefault('ensure_ascii', not self.encoding)
self.encoder = ScrapyJSONEncoder(**kwargs)
self.first_item = True
+ def _beautify_newline(self):
+ if self.indent is not None:
+ self.file.write(b'\n')
+
def start_exporting(self):
- self.file.write(b"[\n")
+ self.file.write(b"[")
+ self._beautify_newline()
def finish_exporting(self):
- self.file.write(b"\n]")
+ self._beautify_newline()
+ self.file.write(b"]")
def export_item(self, item):
if self.first_item:
self.first_item = False
else:
- self.file.write(b',\n')
+ self.file.write(b',')
+ self._beautify_newline()
itemdict = dict(self._get_serialized_fields(item))
data = self.encoder.encode(itemdict)
self.file.write(to_bytes(data, self.encoding))
@@ -128,33 +141,52 @@ class XmlItemExporter(BaseItemExporter):
self.encoding = 'utf-8'
self.xg = XMLGenerator(file, encoding=self.encoding)
+ def _beautify_newline(self, new_item=False):
+ if self.indent is not None and (self.indent > 0 or new_item):
+ self._xg_characters('\n')
+
+ def _beautify_indent(self, depth=1):
+ if self.indent:
+ self._xg_characters(' ' * self.indent * depth)
+
def start_exporting(self):
self.xg.startDocument()
self.xg.startElement(self.root_element, {})
+ self._beautify_newline(new_item=True)
def export_item(self, item):
+ self._beautify_indent(depth=1)
self.xg.startElement(self.item_element, {})
+ self._beautify_newline()
for name, value in self._get_serialized_fields(item, default_value=''):
- self._export_xml_field(name, value)
+ self._export_xml_field(name, value, depth=2)
+ self._beautify_indent(depth=1)
self.xg.endElement(self.item_element)
+ self._beautify_newline(new_item=True)
def finish_exporting(self):
self.xg.endElement(self.root_element)
self.xg.endDocument()
- def _export_xml_field(self, name, serialized_value):
+ def _export_xml_field(self, name, serialized_value, depth):
+ self._beautify_indent(depth=depth)
self.xg.startElement(name, {})
if hasattr(serialized_value, 'items'):
+ self._beautify_newline()
for subname, value in serialized_value.items():
- self._export_xml_field(subname, value)
+ self._export_xml_field(subname, value, depth=depth+1)
+ self._beautify_indent(depth=depth)
elif is_listlike(serialized_value):
+ self._beautify_newline()
for value in serialized_value:
- self._export_xml_field('value', value)
+ self._export_xml_field('value', value, depth=depth+1)
+ self._beautify_indent(depth=depth)
elif isinstance(serialized_value, six.text_type):
self._xg_characters(serialized_value)
else:
self._xg_characters(str(serialized_value))
self.xg.endElement(name)
+ self._beautify_newline()
# Workaround for http://bugs.python.org/issue17606
# Before Python 2.7.4 xml.sax.saxutils required bytes;
diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py
index 85d328528..5f133fbde 100644
--- a/scrapy/extensions/feedexport.py
+++ b/scrapy/extensions/feedexport.py
@@ -172,6 +172,9 @@ class FeedExporter(object):
self.store_empty = settings.getbool('FEED_STORE_EMPTY')
self._exporting = False
self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None
+ self.indent = None
+ if settings.get('FEED_EXPORT_INDENT') is not None:
+ self.indent = settings.getint('FEED_EXPORT_INDENT')
uripar = settings['FEED_URI_PARAMS']
self._uripar = load_object(uripar) if uripar else lambda x, y: None
@@ -188,7 +191,7 @@ class FeedExporter(object):
storage = self._get_storage(uri)
file = storage.open(spider)
exporter = self._get_exporter(file, fields_to_export=self.export_fields,
- encoding=self.export_encoding)
+ encoding=self.export_encoding, indent=self.indent)
if self.store_empty:
exporter.start_exporting()
self._exporting = True
diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py
index 854cefc9c..26ff4257e 100644
--- a/scrapy/settings/default_settings.py
+++ b/scrapy/settings/default_settings.py
@@ -161,6 +161,7 @@ FEED_EXPORTERS_BASE = {
'marshal': 'scrapy.exporters.MarshalItemExporter',
'pickle': 'scrapy.exporters.PickleItemExporter',
}
+FEED_EXPORT_INDENT = 0
FILES_STORE_S3_ACL = 'private'
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
index 2d137edf4..f55927121 100644
--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@@ -319,14 +319,14 @@ class FeedExportTest(unittest.TestCase):
@defer.inlineCallbacks
def test_export_no_items_store_empty(self):
formats = (
- ('json', b'[\n\n]'),
+ ('json', b'[]'),
('jsonlines', b''),
('xml', b'\n'),
('csv', b''),
)
for fmt, expctd in formats:
- settings = {'FEED_FORMAT': fmt, 'FEED_STORE_EMPTY': True}
+ settings = {'FEED_FORMAT': fmt, 'FEED_STORE_EMPTY': True, 'FEED_EXPORT_INDENT': None}
data = yield self.exported_no_data(settings)
self.assertEqual(data, expctd)
@@ -425,25 +425,177 @@ class FeedExportTest(unittest.TestCase):
header = ['foo']
formats = {
- 'json': u'[\n{"foo": "Test\\u00d6"}\n]'.encode('utf-8'),
+ 'json': u'[{"foo": "Test\\u00d6"}]'.encode('utf-8'),
'jsonlines': u'{"foo": "Test\\u00d6"}\n'.encode('utf-8'),
'xml': u'\n- Test\xd6
'.encode('utf-8'),
'csv': u'foo\r\nTest\xd6\r\n'.encode('utf-8'),
}
- for format in formats:
- settings = {'FEED_FORMAT': format}
+ for format, expected in formats.items():
+ settings = {'FEED_FORMAT': format, 'FEED_EXPORT_INDENT': None}
data = yield self.exported_data(items, settings)
- self.assertEqual(formats[format], data)
+ self.assertEqual(expected, data)
formats = {
- 'json': u'[\n{"foo": "Test\xd6"}\n]'.encode('latin-1'),
+ 'json': u'[{"foo": "Test\xd6"}]'.encode('latin-1'),
'jsonlines': u'{"foo": "Test\xd6"}\n'.encode('latin-1'),
'xml': u'\n- Test\xd6
'.encode('latin-1'),
'csv': u'foo\r\nTest\xd6\r\n'.encode('latin-1'),
}
- for format in formats:
- settings = {'FEED_FORMAT': format, 'FEED_EXPORT_ENCODING': 'latin-1'}
+ settings = {'FEED_EXPORT_INDENT': None, 'FEED_EXPORT_ENCODING': 'latin-1'}
+ for format, expected in formats.items():
+ settings['FEED_FORMAT'] = format
data = yield self.exported_data(items, settings)
- self.assertEqual(formats[format], data)
+ self.assertEqual(expected, data)
+
+ @defer.inlineCallbacks
+ def test_export_indentation(self):
+ items = [
+ {'foo': ['bar']},
+ {'key': 'value'},
+ ]
+
+ test_cases = [
+ # JSON
+ {
+ 'format': 'json',
+ 'indent': None,
+ 'expected': b'[{"foo": ["bar"]},{"key": "value"}]',
+ },
+ {
+ 'format': 'json',
+ 'indent': -1,
+ 'expected': b"""[
+{"foo": ["bar"]},
+{"key": "value"}
+]""",
+ },
+ {
+ 'format': 'json',
+ 'indent': 0,
+ 'expected': b"""[
+{"foo": ["bar"]},
+{"key": "value"}
+]""",
+ },
+ {
+ 'format': 'json',
+ 'indent': 2,
+ 'expected': b"""[
+{
+ "foo": [
+ "bar"
+ ]
+},
+{
+ "key": "value"
+}
+]""",
+ },
+ {
+ 'format': 'json',
+ 'indent': 4,
+ 'expected': b"""[
+{
+ "foo": [
+ "bar"
+ ]
+},
+{
+ "key": "value"
+}
+]""",
+ },
+ {
+ 'format': 'json',
+ 'indent': 5,
+ 'expected': b"""[
+{
+ "foo": [
+ "bar"
+ ]
+},
+{
+ "key": "value"
+}
+]""",
+ },
+
+ # XML
+ {
+ 'format': 'xml',
+ 'indent': None,
+ 'expected': b"""
+- bar
- value
""",
+ },
+ {
+ 'format': 'xml',
+ 'indent': -1,
+ 'expected': b"""
+
+- bar
+- value
+""",
+ },
+ {
+ 'format': 'xml',
+ 'indent': 0,
+ 'expected': b"""
+
+- bar
+- value
+""",
+ },
+ {
+ 'format': 'xml',
+ 'indent': 2,
+ 'expected': b"""
+
+ -
+
+ bar
+
+
+ -
+ value
+
+""",
+ },
+ {
+ 'format': 'xml',
+ 'indent': 4,
+ 'expected': b"""
+
+ -
+
+ bar
+
+
+ -
+ value
+
+""",
+ },
+ {
+ 'format': 'xml',
+ 'indent': 5,
+ 'expected': b"""
+
+ -
+
+ bar
+
+
+ -
+ value
+
+""",
+ },
+ ]
+
+ for row in test_cases:
+ settings = {'FEED_FORMAT': row['format'], 'FEED_EXPORT_INDENT': row['indent']}
+ data = yield self.exported_data(items, settings)
+ print(row['format'], row['indent'])
+ self.assertEqual(row['expected'], data)