From 7e9153b38d7ef70f1f19a82506b669433d134b01 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Mon, 19 Dec 2016 10:43:04 -0300 Subject: [PATCH 1/6] Feed exports: beautify JSON and XML --- docs/topics/exporters.rst | 9 ++- docs/topics/feed-exports.rst | 14 ++++ scrapy/exporters.py | 30 ++++++-- scrapy/extensions/feedexport.py | 3 +- scrapy/settings/default_settings.py | 1 + tests/test_feedexport.py | 113 ++++++++++++++++++++++++++-- 6 files changed, 156 insertions(+), 14 deletions(-) diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst index 85c73222d..4114eda58 100644 --- a/docs/topics/exporters.rst +++ b/docs/topics/exporters.rst @@ -140,7 +140,7 @@ output examples, which assume you're exporting these two items:: BaseItemExporter ---------------- -.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8') +.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent_width=None) This is the (abstract) base class for all Item Exporters. It provides support for common features used by all (concrete) Item Exporters, such as @@ -149,7 +149,7 @@ BaseItemExporter These features can be configured through the constructor arguments which populate their respective instance attributes: :attr:`fields_to_export`, - :attr:`export_empty_fields`, :attr:`encoding`. + :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent_width`. .. method:: export_item(item) @@ -216,6 +216,11 @@ BaseItemExporter encoding). Other value types are passed unchanged to the specific serialization library. + .. attribute:: indent_width + + Amount of spaces used to indent the output on each level. + Defaults to ``None``, which disables indentation. + .. highlight:: none XmlItemExporter diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index efdd8c46b..ce3b5fd75 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -209,6 +209,7 @@ These are the settings used for configuring the feed exports: * :setting:`FEED_STORE_EMPTY` * :setting:`FEED_EXPORT_ENCODING` * :setting:`FEED_EXPORT_FIELDS` + * :setting:`FEED_EXPORT_INDENT_WIDTH` .. currentmodule:: scrapy.extensions.feedexport @@ -266,6 +267,19 @@ If an exporter requires a fixed set of fields (this is the case for is empty or None, then Scrapy tries to infer field names from the exported data - currently it uses field names from the first item. +.. setting:: FEED_EXPORT_INDENT_WIDTH + +FEED_EXPORT_INDENT_WIDTH +------------------------ + +Default: ``None`` + +Amount of spaces to indent on each level. +Set to `None` to disable indentation. + +Currently used by :class:`~scrapy.exporters.JsonItemExporter` +and :class:`~scrapy.exporters.XmlItemExporter` + .. setting:: FEED_STORE_EMPTY FEED_STORE_EMPTY diff --git a/scrapy/exporters.py b/scrapy/exporters.py index c4b1b3476..69e6c15e0 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -36,6 +36,7 @@ class BaseItemExporter(object): self.encoding = options.pop('encoding', None) self.fields_to_export = options.pop('fields_to_export', None) self.export_empty_fields = options.pop('export_empty_fields', False) + self.indent_width = options.pop('indent_width', None) if not dont_fail and options: raise TypeError("Unexpected options: %s" % ', '.join(options.keys())) @@ -99,7 +100,7 @@ class JsonItemExporter(BaseItemExporter): self._configure(kwargs, dont_fail=True) self.file = file kwargs.setdefault('ensure_ascii', not self.encoding) - self.encoder = ScrapyJSONEncoder(**kwargs) + self.encoder = ScrapyJSONEncoder(indent=self.indent_width, **kwargs) self.first_item = True def start_exporting(self): @@ -128,33 +129,52 @@ class XmlItemExporter(BaseItemExporter): self.encoding = 'utf-8' self.xg = XMLGenerator(file, encoding=self.encoding) + def _beautify_newline(self): + if self.indent_width: + self._xg_characters('\n') + + def _beautify_indent(self, depth=1): + if self.indent_width: + self._xg_characters(' ' * self.indent_width * depth) + def start_exporting(self): self.xg.startDocument() self.xg.startElement(self.root_element, {}) + self._beautify_newline() def export_item(self, item): + self._beautify_indent(depth=1) self.xg.startElement(self.item_element, {}) + self._beautify_newline() for name, value in self._get_serialized_fields(item, default_value=''): - self._export_xml_field(name, value) + self._export_xml_field(name, value, depth=2) + self._beautify_indent(depth=1) self.xg.endElement(self.item_element) + self._beautify_newline() def finish_exporting(self): self.xg.endElement(self.root_element) self.xg.endDocument() - def _export_xml_field(self, name, serialized_value): + def _export_xml_field(self, name, serialized_value, depth): + self._beautify_indent(depth=depth) self.xg.startElement(name, {}) if hasattr(serialized_value, 'items'): + self._beautify_newline() for subname, value in serialized_value.items(): - self._export_xml_field(subname, value) + self._export_xml_field(subname, value, depth=depth+1) + self._beautify_indent(depth=depth) elif is_listlike(serialized_value): + self._beautify_newline() for value in serialized_value: - self._export_xml_field('value', value) + self._export_xml_field('value', value, depth=depth+1) + self._beautify_indent(depth=depth) elif isinstance(serialized_value, six.text_type): self._xg_characters(serialized_value) else: self._xg_characters(str(serialized_value)) self.xg.endElement(name) + self._beautify_newline() # Workaround for http://bugs.python.org/issue17606 # Before Python 2.7.4 xml.sax.saxutils required bytes; diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py index 85d328528..26024e5e9 100644 --- a/scrapy/extensions/feedexport.py +++ b/scrapy/extensions/feedexport.py @@ -172,6 +172,7 @@ class FeedExporter(object): self.store_empty = settings.getbool('FEED_STORE_EMPTY') self._exporting = False self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None + self.indent_width = settings.getint('FEED_EXPORT_INDENT_WIDTH') or None uripar = settings['FEED_URI_PARAMS'] self._uripar = load_object(uripar) if uripar else lambda x, y: None @@ -188,7 +189,7 @@ class FeedExporter(object): storage = self._get_storage(uri) file = storage.open(spider) exporter = self._get_exporter(file, fields_to_export=self.export_fields, - encoding=self.export_encoding) + encoding=self.export_encoding, indent_width=self.indent_width) if self.store_empty: exporter.start_exporting() self._exporting = True diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index d73c595d2..cca0d3889 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -161,6 +161,7 @@ FEED_EXPORTERS_BASE = { 'marshal': 'scrapy.exporters.MarshalItemExporter', 'pickle': 'scrapy.exporters.PickleItemExporter', } +FEED_EXPORT_INDENT_WIDTH = None FILES_STORE_S3_ACL = 'private' diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index 2d137edf4..bf002bec7 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -431,10 +431,10 @@ class FeedExportTest(unittest.TestCase): 'csv': u'foo\r\nTest\xd6\r\n'.encode('utf-8'), } - for format in formats: - settings = {'FEED_FORMAT': format} + for format, expected in formats.items(): + settings = {'FEED_FORMAT': format, 'FEED_EXPORT_INDENT_WIDTH': None} data = yield self.exported_data(items, settings) - self.assertEqual(formats[format], data) + self.assertEqual(expected, data) formats = { 'json': u'[\n{"foo": "Test\xd6"}\n]'.encode('latin-1'), @@ -443,7 +443,108 @@ class FeedExportTest(unittest.TestCase): 'csv': u'foo\r\nTest\xd6\r\n'.encode('latin-1'), } - for format in formats: - settings = {'FEED_FORMAT': format, 'FEED_EXPORT_ENCODING': 'latin-1'} + settings = {'FEED_EXPORT_INDENT_WIDTH': None, 'FEED_EXPORT_ENCODING': 'latin-1'} + for format, expected in formats.items(): + settings['FEED_FORMAT'] = format data = yield self.exported_data(items, settings) - self.assertEqual(formats[format], data) + self.assertEqual(expected, data) + + @defer.inlineCallbacks + def test_export_indentation(self): + items = [dict({'foo': ['bar']})] + + output = [ + # JSON + { + 'format': 'json', + 'indent_width': None, + 'expected': b'[\n{"foo": ["bar"]}\n]', + }, + { + 'format': 'json', + 'indent_width': 2, + 'expected': b""" +[ +{ + "foo": [ + "bar" + ] +} +]""", + }, + { + 'format': 'json', + 'indent_width': 4, + 'expected': b""" +[ +{ + "foo": [ + "bar" + ] +} +]""", + }, + { + 'format': 'json', + 'indent_width': 5, + 'expected': b""" +[ +{ + "foo": [ + "bar" + ] +} +]""", + }, + + # XML + { + 'format': 'xml', + 'indent_width': None, + 'expected': b'\nbar', + }, + { + 'format': 'xml', + 'indent_width': 2, + 'expected': b""" + + + + + bar + + +""", + }, + { + 'format': 'xml', + 'indent_width': 4, + 'expected': b""" + + + + + bar + + +""", + }, + { + 'format': 'xml', + 'indent_width': 5, + 'expected': b""" + + + + + bar + + +""", + }, + ] + + for row in output: + settings = {'FEED_FORMAT': row['format'], 'FEED_EXPORT_INDENT_WIDTH': row['indent_width']} + data = yield self.exported_data(items, settings) + self.assertEqual(row['expected'].strip(), data) From 766b2c84539d58ee871e1f301df1ad0ae0d44079 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Thu, 23 Feb 2017 10:21:33 -0300 Subject: [PATCH 2/6] Feed exports: enforce difference between None and 0 on indent Also rename params and settings from "indent_width" to just "indent" --- docs/topics/exporters.rst | 14 ++-- docs/topics/feed-exports.rst | 14 ++-- scrapy/exporters.py | 24 ++++-- scrapy/extensions/feedexport.py | 6 +- scrapy/settings/default_settings.py | 2 +- tests/test_feedexport.py | 116 ++++++++++++++++++++++++---- 6 files changed, 137 insertions(+), 39 deletions(-) diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst index 4114eda58..ad559fb35 100644 --- a/docs/topics/exporters.rst +++ b/docs/topics/exporters.rst @@ -140,7 +140,7 @@ output examples, which assume you're exporting these two items:: BaseItemExporter ---------------- -.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent_width=None) +.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=None) This is the (abstract) base class for all Item Exporters. It provides support for common features used by all (concrete) Item Exporters, such as @@ -149,7 +149,7 @@ BaseItemExporter These features can be configured through the constructor arguments which populate their respective instance attributes: :attr:`fields_to_export`, - :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent_width`. + :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`. .. method:: export_item(item) @@ -216,10 +216,14 @@ BaseItemExporter encoding). Other value types are passed unchanged to the specific serialization library. - .. attribute:: indent_width + .. attribute:: indent - Amount of spaces used to indent the output on each level. - Defaults to ``None``, which disables indentation. + Amount of spaces used to indent the output on each level. Defaults to ``None``, + which disables indentation. This argument behaves like ``indent`` in python's + JSON module (both for JSON and XML exporters): "If ``indent`` is a non-negative + integer, then array elements and object members will be pretty-printed with that + indent level. An indent level of 0, or negative, will only insert newlines. + ``None`` (the default) selects the most compact representation" .. highlight:: none diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index ce3b5fd75..afaa972e5 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -209,7 +209,7 @@ These are the settings used for configuring the feed exports: * :setting:`FEED_STORE_EMPTY` * :setting:`FEED_EXPORT_ENCODING` * :setting:`FEED_EXPORT_FIELDS` - * :setting:`FEED_EXPORT_INDENT_WIDTH` + * :setting:`FEED_EXPORT_INDENT` .. currentmodule:: scrapy.extensions.feedexport @@ -267,15 +267,17 @@ If an exporter requires a fixed set of fields (this is the case for is empty or None, then Scrapy tries to infer field names from the exported data - currently it uses field names from the first item. -.. setting:: FEED_EXPORT_INDENT_WIDTH +.. setting:: FEED_EXPORT_INDENT -FEED_EXPORT_INDENT_WIDTH ------------------------- +FEED_EXPORT_INDENT +------------------ Default: ``None`` -Amount of spaces to indent on each level. -Set to `None` to disable indentation. +Amount of spaces used to indent the output on each level. If ``FEED_EXPORT_INDENT`` +is a non-negative integer, then array elements and object members will be pretty-printed +with that indent level. An indent level of 0, or negative, will only insert newlines. +``None`` (the default) selects the most compact representation Currently used by :class:`~scrapy.exporters.JsonItemExporter` and :class:`~scrapy.exporters.XmlItemExporter` diff --git a/scrapy/exporters.py b/scrapy/exporters.py index 69e6c15e0..1dfa2af85 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -36,7 +36,7 @@ class BaseItemExporter(object): self.encoding = options.pop('encoding', None) self.fields_to_export = options.pop('fields_to_export', None) self.export_empty_fields = options.pop('export_empty_fields', False) - self.indent_width = options.pop('indent_width', None) + self.indent = options.pop('indent', None) if not dont_fail and options: raise TypeError("Unexpected options: %s" % ', '.join(options.keys())) @@ -100,20 +100,28 @@ class JsonItemExporter(BaseItemExporter): self._configure(kwargs, dont_fail=True) self.file = file kwargs.setdefault('ensure_ascii', not self.encoding) - self.encoder = ScrapyJSONEncoder(indent=self.indent_width, **kwargs) + kwargs.setdefault('indent', self.indent) + self.encoder = ScrapyJSONEncoder(**kwargs) self.first_item = True + def _beautify_newline(self): + if self.indent is not None: + self.file.write(b'\n') + def start_exporting(self): - self.file.write(b"[\n") + self.file.write(b"[") + self._beautify_newline() def finish_exporting(self): - self.file.write(b"\n]") + self._beautify_newline() + self.file.write(b"]") def export_item(self, item): if self.first_item: self.first_item = False else: - self.file.write(b',\n') + self.file.write(b',') + self._beautify_newline() itemdict = dict(self._get_serialized_fields(item)) data = self.encoder.encode(itemdict) self.file.write(to_bytes(data, self.encoding)) @@ -130,12 +138,12 @@ class XmlItemExporter(BaseItemExporter): self.xg = XMLGenerator(file, encoding=self.encoding) def _beautify_newline(self): - if self.indent_width: + if self.indent is not None: self._xg_characters('\n') def _beautify_indent(self, depth=1): - if self.indent_width: - self._xg_characters(' ' * self.indent_width * depth) + if self.indent: + self._xg_characters(' ' * self.indent * depth) def start_exporting(self): self.xg.startDocument() diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py index 26024e5e9..5f133fbde 100644 --- a/scrapy/extensions/feedexport.py +++ b/scrapy/extensions/feedexport.py @@ -172,7 +172,9 @@ class FeedExporter(object): self.store_empty = settings.getbool('FEED_STORE_EMPTY') self._exporting = False self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None - self.indent_width = settings.getint('FEED_EXPORT_INDENT_WIDTH') or None + self.indent = None + if settings.get('FEED_EXPORT_INDENT') is not None: + self.indent = settings.getint('FEED_EXPORT_INDENT') uripar = settings['FEED_URI_PARAMS'] self._uripar = load_object(uripar) if uripar else lambda x, y: None @@ -189,7 +191,7 @@ class FeedExporter(object): storage = self._get_storage(uri) file = storage.open(spider) exporter = self._get_exporter(file, fields_to_export=self.export_fields, - encoding=self.export_encoding, indent_width=self.indent_width) + encoding=self.export_encoding, indent=self.indent) if self.store_empty: exporter.start_exporting() self._exporting = True diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index cca0d3889..fc265e2ba 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -161,7 +161,7 @@ FEED_EXPORTERS_BASE = { 'marshal': 'scrapy.exporters.MarshalItemExporter', 'pickle': 'scrapy.exporters.PickleItemExporter', } -FEED_EXPORT_INDENT_WIDTH = None +FEED_EXPORT_INDENT = None FILES_STORE_S3_ACL = 'private' diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index bf002bec7..2b82bba0c 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -319,7 +319,7 @@ class FeedExportTest(unittest.TestCase): @defer.inlineCallbacks def test_export_no_items_store_empty(self): formats = ( - ('json', b'[\n\n]'), + ('json', b'[]'), ('jsonlines', b''), ('xml', b'\n'), ('csv', b''), @@ -425,25 +425,25 @@ class FeedExportTest(unittest.TestCase): header = ['foo'] formats = { - 'json': u'[\n{"foo": "Test\\u00d6"}\n]'.encode('utf-8'), + 'json': u'[{"foo": "Test\\u00d6"}]'.encode('utf-8'), 'jsonlines': u'{"foo": "Test\\u00d6"}\n'.encode('utf-8'), 'xml': u'\nTest\xd6'.encode('utf-8'), 'csv': u'foo\r\nTest\xd6\r\n'.encode('utf-8'), } for format, expected in formats.items(): - settings = {'FEED_FORMAT': format, 'FEED_EXPORT_INDENT_WIDTH': None} + settings = {'FEED_FORMAT': format, 'FEED_EXPORT_INDENT': None} data = yield self.exported_data(items, settings) self.assertEqual(expected, data) formats = { - 'json': u'[\n{"foo": "Test\xd6"}\n]'.encode('latin-1'), + 'json': u'[{"foo": "Test\xd6"}]'.encode('latin-1'), 'jsonlines': u'{"foo": "Test\xd6"}\n'.encode('latin-1'), 'xml': u'\nTest\xd6'.encode('latin-1'), 'csv': u'foo\r\nTest\xd6\r\n'.encode('latin-1'), } - settings = {'FEED_EXPORT_INDENT_WIDTH': None, 'FEED_EXPORT_ENCODING': 'latin-1'} + settings = {'FEED_EXPORT_INDENT': None, 'FEED_EXPORT_ENCODING': 'latin-1'} for format, expected in formats.items(): settings['FEED_FORMAT'] = format data = yield self.exported_data(items, settings) @@ -451,48 +451,89 @@ class FeedExportTest(unittest.TestCase): @defer.inlineCallbacks def test_export_indentation(self): - items = [dict({'foo': ['bar']})] + items = [dict({'foo': ['bar']}), dict({'key': 'value'})] output = [ # JSON { 'format': 'json', - 'indent_width': None, - 'expected': b'[\n{"foo": ["bar"]}\n]', + 'indent': None, + 'expected': b'[{"foo": ["bar"]},{"key": "value"}]', }, { 'format': 'json', - 'indent_width': 2, + 'indent': -1, + 'expected': b""" +[ +{ +"foo": [ +"bar" +] +}, +{ +"key": "value" +} +] +""", + }, + { + 'format': 'json', + 'indent': 0, + 'expected': b""" +[ +{ +"foo": [ +"bar" +] +}, +{ +"key": "value" +} +] +""", + }, + { + 'format': 'json', + 'indent': 2, 'expected': b""" [ { "foo": [ "bar" ] +}, +{ + "key": "value" } ]""", }, { 'format': 'json', - 'indent_width': 4, + 'indent': 4, 'expected': b""" [ { "foo": [ "bar" ] +}, +{ + "key": "value" } ]""", }, { 'format': 'json', - 'indent_width': 5, + 'indent': 5, 'expected': b""" [ { "foo": [ "bar" ] +}, +{ + "key": "value" } ]""", }, @@ -500,12 +541,44 @@ class FeedExportTest(unittest.TestCase): # XML { 'format': 'xml', - 'indent_width': None, - 'expected': b'\nbar', + 'indent': None, + 'expected': b'\nbarvalue', }, { 'format': 'xml', - 'indent_width': 2, + 'indent': -1, + 'expected': b""" + + + + +bar + + + +value + +""", + }, + { + 'format': 'xml', + 'indent': 0, + 'expected': b""" + + + + +bar + + + +value + +""", + }, + { + 'format': 'xml', + 'indent': 2, 'expected': b""" @@ -514,11 +587,14 @@ class FeedExportTest(unittest.TestCase): bar + + value + """, }, { 'format': 'xml', - 'indent_width': 4, + 'indent': 4, 'expected': b""" @@ -527,11 +603,14 @@ class FeedExportTest(unittest.TestCase): bar + + value + """, }, { 'format': 'xml', - 'indent_width': 5, + 'indent': 5, 'expected': b""" @@ -540,11 +619,14 @@ class FeedExportTest(unittest.TestCase): bar + + value + """, }, ] for row in output: - settings = {'FEED_FORMAT': row['format'], 'FEED_EXPORT_INDENT_WIDTH': row['indent_width']} + settings = {'FEED_FORMAT': row['format'], 'FEED_EXPORT_INDENT': row['indent']} data = yield self.exported_data(items, settings) self.assertEqual(row['expected'].strip(), data) From c7bb2fa8ce2633d92a7ec2840f84b174a5494428 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Tue, 7 Mar 2017 11:55:26 -0300 Subject: [PATCH 3/6] Feed exports: consistent and backwards compatible behaviour on indent --- docs/topics/exporters.rst | 14 ++++---- docs/topics/feed-exports.rst | 6 ++-- scrapy/exporters.py | 14 +++++--- scrapy/settings/default_settings.py | 2 +- tests/test_feedexport.py | 56 ++++++++++------------------- 5 files changed, 39 insertions(+), 53 deletions(-) diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst index ad559fb35..2ad77c905 100644 --- a/docs/topics/exporters.rst +++ b/docs/topics/exporters.rst @@ -140,7 +140,7 @@ output examples, which assume you're exporting these two items:: BaseItemExporter ---------------- -.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=None) +.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=0) This is the (abstract) base class for all Item Exporters. It provides support for common features used by all (concrete) Item Exporters, such as @@ -218,12 +218,12 @@ BaseItemExporter .. attribute:: indent - Amount of spaces used to indent the output on each level. Defaults to ``None``, - which disables indentation. This argument behaves like ``indent`` in python's - JSON module (both for JSON and XML exporters): "If ``indent`` is a non-negative - integer, then array elements and object members will be pretty-printed with that - indent level. An indent level of 0, or negative, will only insert newlines. - ``None`` (the default) selects the most compact representation" + Amount of spaces used to indent the output on each level. Defaults to ``0``. + + * ``indent=None`` selects the most compact representation, + all items in the same line with no indentation + * ``indent<=0`` each item on it's own line, no indentation + * ``indent>0`` each item on it's own line, indentated with the provided numeric value .. highlight:: none diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index afaa972e5..e57a4e776 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -272,12 +272,12 @@ exported data - currently it uses field names from the first item. FEED_EXPORT_INDENT ------------------ -Default: ``None`` +Default: ``0`` Amount of spaces used to indent the output on each level. If ``FEED_EXPORT_INDENT`` is a non-negative integer, then array elements and object members will be pretty-printed -with that indent level. An indent level of 0, or negative, will only insert newlines. -``None`` (the default) selects the most compact representation +with that indent level. An indent level of ``0``, or negative, will put each item on a new line. +``None`` selects the most compact representation Currently used by :class:`~scrapy.exporters.JsonItemExporter` and :class:`~scrapy.exporters.XmlItemExporter` diff --git a/scrapy/exporters.py b/scrapy/exporters.py index 1dfa2af85..e2d42b6ab 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -99,8 +99,12 @@ class JsonItemExporter(BaseItemExporter): def __init__(self, file, **kwargs): self._configure(kwargs, dont_fail=True) self.file = file + # there is a small difference between the behaviour or JsonItemExporter.indent + # and ScrapyJSONEncoder.indent. ScrapyJSONEncoder.indent=None is needed to prevent + # the addition of newlines everywhere + json_indent = self.indent if self.indent is not None and self.indent > 0 else None + kwargs.setdefault('indent', json_indent) kwargs.setdefault('ensure_ascii', not self.encoding) - kwargs.setdefault('indent', self.indent) self.encoder = ScrapyJSONEncoder(**kwargs) self.first_item = True @@ -137,8 +141,8 @@ class XmlItemExporter(BaseItemExporter): self.encoding = 'utf-8' self.xg = XMLGenerator(file, encoding=self.encoding) - def _beautify_newline(self): - if self.indent is not None: + def _beautify_newline(self, new_item=False): + if self.indent is not None and (self.indent > 0 or new_item): self._xg_characters('\n') def _beautify_indent(self, depth=1): @@ -148,7 +152,7 @@ class XmlItemExporter(BaseItemExporter): def start_exporting(self): self.xg.startDocument() self.xg.startElement(self.root_element, {}) - self._beautify_newline() + self._beautify_newline(new_item=True) def export_item(self, item): self._beautify_indent(depth=1) @@ -158,7 +162,7 @@ class XmlItemExporter(BaseItemExporter): self._export_xml_field(name, value, depth=2) self._beautify_indent(depth=1) self.xg.endElement(self.item_element) - self._beautify_newline() + self._beautify_newline(new_item=True) def finish_exporting(self): self.xg.endElement(self.root_element) diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index fc265e2ba..bbc02cfdb 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -161,7 +161,7 @@ FEED_EXPORTERS_BASE = { 'marshal': 'scrapy.exporters.MarshalItemExporter', 'pickle': 'scrapy.exporters.PickleItemExporter', } -FEED_EXPORT_INDENT = None +FEED_EXPORT_INDENT = 0 FILES_STORE_S3_ACL = 'private' diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index 2b82bba0c..c66c470a8 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -326,7 +326,7 @@ class FeedExportTest(unittest.TestCase): ) for fmt, expctd in formats: - settings = {'FEED_FORMAT': fmt, 'FEED_STORE_EMPTY': True} + settings = {'FEED_FORMAT': fmt, 'FEED_STORE_EMPTY': True, 'FEED_EXPORT_INDENT': None} data = yield self.exported_no_data(settings) self.assertEqual(data, expctd) @@ -451,9 +451,12 @@ class FeedExportTest(unittest.TestCase): @defer.inlineCallbacks def test_export_indentation(self): - items = [dict({'foo': ['bar']}), dict({'key': 'value'})] + items = [ + {'foo': ['bar']}, + {'key': 'value'}, + ] - output = [ + test_cases = [ # JSON { 'format': 'json', @@ -465,14 +468,8 @@ class FeedExportTest(unittest.TestCase): 'indent': -1, 'expected': b""" [ -{ -"foo": [ -"bar" -] -}, -{ -"key": "value" -} +{"foo": ["bar"]}, +{"key": "value"} ] """, }, @@ -481,14 +478,8 @@ class FeedExportTest(unittest.TestCase): 'indent': 0, 'expected': b""" [ -{ -"foo": [ -"bar" -] -}, -{ -"key": "value" -} +{"foo": ["bar"]}, +{"key": "value"} ] """, }, @@ -542,7 +533,9 @@ class FeedExportTest(unittest.TestCase): { 'format': 'xml', 'indent': None, - 'expected': b'\nbarvalue', + 'expected': b""" + +barvalue""", }, { 'format': 'xml', @@ -550,14 +543,8 @@ class FeedExportTest(unittest.TestCase): 'expected': b""" - - -bar - - - -value - +bar +value """, }, { @@ -566,14 +553,8 @@ class FeedExportTest(unittest.TestCase): 'expected': b""" - - -bar - - - -value - +bar +value """, }, { @@ -626,7 +607,8 @@ class FeedExportTest(unittest.TestCase): }, ] - for row in output: + for row in test_cases: settings = {'FEED_FORMAT': row['format'], 'FEED_EXPORT_INDENT': row['indent']} data = yield self.exported_data(items, settings) + print(row['format'], row['indent']) self.assertEqual(row['expected'].strip(), data) From 63b8caf5debf84e8da7f299782d15d0a41bf8a14 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Tue, 9 May 2017 11:58:53 -0300 Subject: [PATCH 4/6] Feed exports: rewrite indentation test without .strip() --- tests/test_feedexport.py | 41 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index c66c470a8..f55927121 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -466,28 +466,23 @@ class FeedExportTest(unittest.TestCase): { 'format': 'json', 'indent': -1, - 'expected': b""" -[ + 'expected': b"""[ {"foo": ["bar"]}, {"key": "value"} -] -""", +]""", }, { 'format': 'json', 'indent': 0, - 'expected': b""" -[ + 'expected': b"""[ {"foo": ["bar"]}, {"key": "value"} -] -""", +]""", }, { 'format': 'json', 'indent': 2, - 'expected': b""" -[ + 'expected': b"""[ { "foo": [ "bar" @@ -501,8 +496,7 @@ class FeedExportTest(unittest.TestCase): { 'format': 'json', 'indent': 4, - 'expected': b""" -[ + 'expected': b"""[ { "foo": [ "bar" @@ -516,8 +510,7 @@ class FeedExportTest(unittest.TestCase): { 'format': 'json', 'indent': 5, - 'expected': b""" -[ + 'expected': b"""[ { "foo": [ "bar" @@ -533,15 +526,13 @@ class FeedExportTest(unittest.TestCase): { 'format': 'xml', 'indent': None, - 'expected': b""" - + 'expected': b""" barvalue""", }, { 'format': 'xml', 'indent': -1, - 'expected': b""" - + 'expected': b""" bar value @@ -550,8 +541,7 @@ class FeedExportTest(unittest.TestCase): { 'format': 'xml', 'indent': 0, - 'expected': b""" - + 'expected': b""" bar value @@ -560,8 +550,7 @@ class FeedExportTest(unittest.TestCase): { 'format': 'xml', 'indent': 2, - 'expected': b""" - + 'expected': b""" @@ -576,8 +565,7 @@ class FeedExportTest(unittest.TestCase): { 'format': 'xml', 'indent': 4, - 'expected': b""" - + 'expected': b""" @@ -592,8 +580,7 @@ class FeedExportTest(unittest.TestCase): { 'format': 'xml', 'indent': 5, - 'expected': b""" - + 'expected': b""" @@ -611,4 +598,4 @@ class FeedExportTest(unittest.TestCase): settings = {'FEED_FORMAT': row['format'], 'FEED_EXPORT_INDENT': row['indent']} data = yield self.exported_data(items, settings) print(row['format'], row['indent']) - self.assertEqual(row['expected'].strip(), data) + self.assertEqual(row['expected'], data) From 25535dba9ca7e6f6f3c2279dd77240a21b1cc672 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Wed, 10 May 2017 16:45:15 -0300 Subject: [PATCH 5/6] Feed exports: edit note, fix typos --- docs/topics/exporters.rst | 4 ++-- docs/topics/feed-exports.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst index 2ad77c905..b6139af92 100644 --- a/docs/topics/exporters.rst +++ b/docs/topics/exporters.rst @@ -222,8 +222,8 @@ BaseItemExporter * ``indent=None`` selects the most compact representation, all items in the same line with no indentation - * ``indent<=0`` each item on it's own line, no indentation - * ``indent>0`` each item on it's own line, indentated with the provided numeric value + * ``indent<=0`` each item on its own line, no indentation + * ``indent>0`` each item on its own line, indented with the provided numeric value .. highlight:: none diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index e57a4e776..d760b1a28 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -279,7 +279,7 @@ is a non-negative integer, then array elements and object members will be pretty with that indent level. An indent level of ``0``, or negative, will put each item on a new line. ``None`` selects the most compact representation -Currently used by :class:`~scrapy.exporters.JsonItemExporter` +Currently implemented only by :class:`~scrapy.exporters.JsonItemExporter` and :class:`~scrapy.exporters.XmlItemExporter` .. setting:: FEED_STORE_EMPTY From 3a0a86ed31df1d22fea3b5b05e853f212adc40c8 Mon Sep 17 00:00:00 2001 From: Paul Tremberth Date: Fri, 12 May 2017 17:26:17 +0200 Subject: [PATCH 6/6] Clarify FEED_EXPORT_INDENT section --- docs/topics/feed-exports.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index d760b1a28..135d05c93 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -276,11 +276,12 @@ Default: ``0`` Amount of spaces used to indent the output on each level. If ``FEED_EXPORT_INDENT`` is a non-negative integer, then array elements and object members will be pretty-printed -with that indent level. An indent level of ``0``, or negative, will put each item on a new line. -``None`` selects the most compact representation +with that indent level. An indent level of ``0`` (the default), or negative, +will put each item on a new line. ``None`` selects the most compact representation. Currently implemented only by :class:`~scrapy.exporters.JsonItemExporter` -and :class:`~scrapy.exporters.XmlItemExporter` +and :class:`~scrapy.exporters.XmlItemExporter`, i.e. when you are exporting +to ``.json`` or ``.xml``. .. setting:: FEED_STORE_EMPTY