diff --git a/scrapy/contrib_exp/newitem/__init__.py b/scrapy/contrib_exp/newitem/__init__.py index a4c3e81df..bde80fc7e 100644 --- a/scrapy/contrib_exp/newitem/__init__.py +++ b/scrapy/contrib_exp/newitem/__init__.py @@ -1,2 +1 @@ from scrapy.contrib_exp.newitem.models import Item -from scrapy.contrib_exp.newitem.fields import * diff --git a/scrapy/contrib_exp/newitem/fields.py b/scrapy/contrib_exp/newitem/fields.py index 49d79a481..15773d3ab 100644 --- a/scrapy/contrib_exp/newitem/fields.py +++ b/scrapy/contrib_exp/newitem/fields.py @@ -3,18 +3,10 @@ import decimal import re import time - -__all__ = ['MultiValuedField', 'BooleanField', 'DateField', 'DateTimeField', - 'DecimalField', 'FloatField', 'IntegerField', 'StringField'] - - class BaseField(object): def __init__(self, default=None): self.default = default or self.to_python(None) - def assign(self, value): - return self.to_python(value) - def to_python(self, value): """ Converts the input value into the expected Python data type. @@ -23,18 +15,6 @@ class BaseField(object): return value -class Field(BaseField): - def assign(self, value): - if hasattr(value, '__iter__'): - return self.to_python(self.deiter(value)) - else: - return self.to_python(value) - - def deiter(self, value): - "Converts the input iterable into a single value." - return ' '.join(value) - - class MultiValuedField(BaseField): def __init__(self, field_type, default=None): self._field = field_type() @@ -47,15 +27,14 @@ class MultiValuedField(BaseField): return [self._field.to_python(v) for v in value] -class BooleanField(Field): +class BooleanField(BaseField): def to_python(self, value): return bool(value) -ansi_date_re = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$') +class DateField(BaseField): + ansi_date_re = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$') - -class DateField(Field): def to_python(self, value): if value is None: return value @@ -64,7 +43,7 @@ class DateField(Field): if isinstance(value, datetime.date): return value - if not ansi_date_re.search(value): + if not self.ansi_date_re.search(value): raise ValueError("Enter a valid date in YYYY-MM-DD format.") year, month, day = map(int, value.split('-')) @@ -74,7 +53,7 @@ class DateField(Field): raise ValueError("Invalid date: %s" % str(e)) -class DateTimeField(Field): +class DateTimeField(BaseField): def to_python(self, value): if value is None: return value @@ -111,41 +90,34 @@ class DateTimeField(Field): raise ValueError('Enter a valid date/time in YYYY-MM-DD HH:MM[:ss[.uuuuuu]] format.') -class DecimalField(Field): +class DecimalField(BaseField): def to_python(self, value): - if value is None: - return value - try: - return decimal.Decimal(value) - except decimal.InvalidOperation: - raise ValueError("This value must be a decimal number.") + return decimal.Decimal(value) if value is not None else None -class FloatField(Field): +class FloatField(BaseField): def to_python(self, value): - if value is None: - return value - try: - return float(value) - except (TypeError, ValueError): - raise ValueError("This value must be a float.") + return float(value) if value is not None else None -class IntegerField(Field): +class IntegerField(BaseField): def to_python(self, value): - if value is None: - return value - try: - return int(value) - except (TypeError, ValueError): - raise ValueError("This value must be an integer.") + return int(value) if value is not None else None -class StringField(Field): +class StringField(BaseField): def to_python(self, value): - if isinstance(value, basestring): + if hasattr(value, '__iter__'): + return self.to_python(self.to_single(value)) + elif isinstance(value, basestring): return value - if value is None: + elif value is None: return value - raise ValueError("This field must be a string.") + else: + raise ValueError("StringField expects a basestring, got %s" \ + % type(value).__name__) + + def to_single(self, value): + "Converts the input iterable into a single value." + return ' '.join(value) diff --git a/scrapy/contrib_exp/newitem/models.py b/scrapy/contrib_exp/newitem/models.py index b29a3a854..b84de9b1a 100644 --- a/scrapy/contrib_exp/newitem/models.py +++ b/scrapy/contrib_exp/newitem/models.py @@ -2,40 +2,45 @@ from scrapy.item import ScrapedItem from scrapy.contrib_exp.newitem.fields import BaseField -class ItemMeta(type): +class _ItemMeta(type): def __new__(meta, class_name, bases, attrs): cls = type.__new__(meta, class_name, bases, attrs) cls.fields = cls.fields.copy() - for n, v in attrs.items(): + for n, v in attrs.iteritems(): if isinstance(v, BaseField): cls.fields[n] = v - return cls class Item(ScrapedItem): """ This is the base class for all scraped items. """ - __metaclass__ = ItemMeta + __metaclass__ = _ItemMeta fields = {} - def __init__(self): + def __init__(self, values=None): self._values = {} + if isinstance(values, dict): + for k, v in values.iteritems(): + setattr(self, k, v) + elif values is not None: + raise TypeError("Items must be instantiated with dicts, got %s" % \ + type(values).__name__) def __setattr__(self, name, value): if name.startswith('_'): - return object.__setattr__(self, name, value) + return ScrapedItem.__setattr__(self, name, value) if name in self.fields.keys(): - self._values[name] = self.fields[name].assign(value) + self._values[name] = self.fields[name].to_python(value) else: raise AttributeError(name) def __getattribute__(self, name): if name.startswith('_') or name == 'fields': - return object.__getattribute__(self, name) + return ScrapedItem.__getattribute__(self, name) if name in self.fields.keys(): try: @@ -46,9 +51,8 @@ class Item(ScrapedItem): raise AttributeError(name) def __repr__(self): + """Generate a representation of this item that can be used to + reconstruct the item by evaluating it """ - Generate the following format so that items can be deserialized - easily: ClassName({'attrib': value, ...}) - """ - reprdict = dict((field, getattr(self, field)) for field in self.fields) - return "%s(%s)" % (self.__class__.__name__, repr(reprdict)) + values = dict((field, getattr(self, field)) for field in self.fields) + return "%s(%s)" % (self.__class__.__name__, repr(values)) diff --git a/scrapy/tests/test_itemadaptor.py b/scrapy/tests/test_itemadaptor.py index c6d606e7b..4bc3e5308 100644 --- a/scrapy/tests/test_itemadaptor.py +++ b/scrapy/tests/test_itemadaptor.py @@ -1,16 +1,16 @@ import unittest import string from scrapy.contrib_exp.newitem.adaptors import adaptor, ItemAdaptor -from scrapy.contrib_exp.newitem import * +from scrapy.contrib_exp.newitem import Item, fields class BaseItem(Item): - name = StringField() + name = fields.StringField() class TestItem(BaseItem): - url = StringField() - summary = StringField() + url = fields.StringField() + summary = fields.StringField() class BaseAdaptor(ItemAdaptor): @@ -30,7 +30,7 @@ class InheritDefaultAdaptor(DefaultedAdaptor): class MultiValuedTestItem(Item): - names = MultiValuedField(StringField) + names = fields.MultiValuedField(fields.StringField) class MultiValuedItemAdaptor(ItemAdaptor): diff --git a/scrapy/tests/test_newitem.py b/scrapy/tests/test_newitem.py index 4d6278b88..68b72087d 100644 --- a/scrapy/tests/test_newitem.py +++ b/scrapy/tests/test_newitem.py @@ -2,7 +2,7 @@ import datetime import decimal import unittest -from scrapy.contrib_exp.newitem import * +from scrapy.contrib_exp.newitem import Item, fields from scrapy.contrib_exp.newitem.fields import BaseField @@ -10,16 +10,30 @@ class NewItemTest(unittest.TestCase): def test_simple(self): class TestItem(Item): - name = StringField() + name = fields.StringField() i = TestItem() i.name = 'name' assert i.name == 'name' + def test_init(self): + class TestItem(Item): + name = fields.StringField() + + i = TestItem() + assert i.name is None + + i2 = TestItem({'name': 'john doe'}) + assert i2.name == 'john doe' + + self.assertRaises(TypeError, TestItem, name='john doe') + + self.assertRaises(AttributeError, TestItem, {'name': 'john doe', 'other': 'foo'}) + def test_multi(self): class TestMultiItem(Item): - name = StringField() - names = MultiValuedField(StringField) + name = fields.StringField() + names = fields.MultiValuedField(fields.StringField) i = TestMultiItem() i.name = 'name' @@ -43,14 +57,14 @@ class NewItemTest(unittest.TestCase): def test_default_value(self): class TestItem(Item): - name = StringField(default='John') + name = fields.StringField(default='John') i = TestItem() assert i.name == 'John' - def test_topython_iter(self): + def test_to_python_iter(self): class TestItem(Item): - name = StringField() + name = fields.StringField() i = TestItem() i.name = ('John', 'Doe') @@ -58,25 +72,31 @@ class NewItemTest(unittest.TestCase): def test_repr(self): class TestItem(Item): - name = StringField() + name = fields.StringField() + number = fields.IntegerField() i = TestItem() i.name = 'John Doe' - assert i.__repr__() == "TestItem({'name': 'John Doe'})" + i.number = '123' + itemrepr = repr(i) + assert itemrepr == "TestItem({'name': 'John Doe', 'number': 123})" + + i2 = eval(itemrepr) + assert i2.name == 'John Doe' + assert i2.number == 123 class NewItemFieldsTest(unittest.TestCase): def test_base_field(self): - f = BaseField() + f = fields.BaseField() assert f.default == None - assert f.assign(1) == 1 assert f.to_python(1) == 1 def test_boolean_field(self): class TestItem(Item): - field = BooleanField() + field = fields.BooleanField() i = TestItem() @@ -94,7 +114,7 @@ class NewItemFieldsTest(unittest.TestCase): def test_date_field(self): class TestItem(Item): - field = DateField() + field = fields.DateField() i = TestItem() @@ -121,7 +141,7 @@ class NewItemFieldsTest(unittest.TestCase): def test_datetime_field(self): class TestItem(Item): - field = DateTimeField() + field = fields.DateTimeField() i = TestItem() @@ -163,7 +183,7 @@ class NewItemFieldsTest(unittest.TestCase): def test_decimal_field(self): class TestItem(Item): - field = DecimalField() + field = fields.DecimalField() i = TestItem() @@ -176,11 +196,11 @@ class NewItemFieldsTest(unittest.TestCase): def set_invalid_value(): i.field = 'text' - self.assertRaises(ValueError, set_invalid_value) + self.assertRaises(decimal.InvalidOperation, set_invalid_value) def test_float_field(self): class TestItem(Item): - field = FloatField() + field = fields.FloatField() i = TestItem() @@ -197,7 +217,7 @@ class NewItemFieldsTest(unittest.TestCase): def test_integer_field(self): class TestItem(Item): - field = IntegerField() + field = fields.IntegerField() i = TestItem() @@ -214,7 +234,7 @@ class NewItemFieldsTest(unittest.TestCase): def test_string_field(self): class TestItem(Item): - field = StringField() + field = fields.StringField() i = TestItem()