1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 04:43:51 +00:00

added a python native classes item exporter

This commit is contained in:
olveyra 2013-08-13 13:48:28 +00:00
parent ed5b9068d2
commit 456b6f2ef5
2 changed files with 58 additions and 4 deletions

View File

@ -9,7 +9,7 @@ import json
import cPickle as pickle
from xml.sax.saxutils import XMLGenerator
from scrapy.utils.serialize import ScrapyJSONEncoder
from scrapy.item import BaseItem
__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter', \
'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter', \
@ -200,7 +200,6 @@ class MarshalItemExporter(BaseItemExporter):
def export_item(self, item):
marshal.dump(dict(self._get_serialized_fields(item)), self.file)
class PprintItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
@ -210,3 +209,25 @@ class PprintItemExporter(BaseItemExporter):
def export_item(self, item):
itemdict = dict(self._get_serialized_fields(item))
self.file.write(pprint.pformat(itemdict) + '\n')
class PythonItemExporter(BaseItemExporter):
def serialize_field(self, field, name, value):
serializer = field.get('serializer', self._serialize_value)
return serializer(value)
def _serialize_value(self, value):
if isinstance(value, BaseItem):
return self.export_item(value)
if isinstance(value, dict):
return dict(self._serialize_dict(value))
if hasattr(value, '__iter__'):
return [self._serialize_value(v) for v in value]
return self._to_str_if_unicode(value)
def _serialize_dict(self, value):
for key, val in value.iteritems():
yield key, self._serialize_value(val)
def export_item(self, item):
return dict(self._get_serialized_fields(item))

View File

@ -5,7 +5,7 @@ from scrapy.item import Item, Field
from scrapy.utils.python import str_to_unicode
from scrapy.contrib.exporter import BaseItemExporter, PprintItemExporter, \
PickleItemExporter, CsvItemExporter, XmlItemExporter, JsonLinesItemExporter, \
JsonItemExporter
JsonItemExporter, PythonItemExporter
class TestItem(Item):
name = Field()
@ -69,7 +69,41 @@ class BaseItemExporterTest(unittest.TestCase):
self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), 'John\xc2\xa3')
self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24')
class PythonItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs):
return PythonItemExporter(**kwargs)
def test_nested_item(self):
i1 = TestItem(name=u'Joseph', age='22')
i2 = TestItem(name=u'Maria', age=i1)
i3 = TestItem(name=u'Jesus', age=i2)
ie = self._get_exporter()
exported = ie.export_item(i3)
self.assertEqual(type(exported), dict)
self.assertEqual(exported, {'age': {'age': {'age': '22', 'name': u'Joseph'}, 'name': u'Maria'}, 'name': 'Jesus'})
self.assertEqual(type(exported['age']), dict)
self.assertEqual(type(exported['age']['age']), dict)
def test_export_list(self):
i1 = TestItem(name=u'Joseph', age='22')
i2 = TestItem(name=u'Maria', age=[i1])
i3 = TestItem(name=u'Jesus', age=[i2])
ie = self._get_exporter()
exported = ie.export_item(i3)
self.assertEqual(exported, {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'})
self.assertEqual(type(exported['age'][0]), dict)
self.assertEqual(type(exported['age'][0]['age'][0]), dict)
def test_export_item_dict_list(self):
i1 = TestItem(name=u'Joseph', age='22')
i2 = dict(name=u'Maria', age=[i1])
i3 = TestItem(name=u'Jesus', age=[i2])
ie = self._get_exporter()
exported = ie.export_item(i3)
self.assertEqual(exported, {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'})
self.assertEqual(type(exported['age'][0]), dict)
self.assertEqual(type(exported['age'][0]['age'][0]), dict)
class PprintItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs):
@ -78,7 +112,6 @@ class PprintItemExporterTest(BaseItemExporterTest):
def _check_output(self):
self._assert_expected_item(eval(self.output.getvalue()))
class PickleItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs):