1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 12:43:41 +00:00

Some changes to newitem API and implementation:

- Dropped support for wildcard importing from newitem package (must now import
  from newitem.fields and don't use wildcard)
- Removed assign() method from Fields as it was apparently redundant (with
  to_python() method) and I couldn't find any reason for keeping it (neither in
  the docs nor in the tests)
- Moved deiter() method of Field to StringField, as its both its purpose and
  implementation was specific for strings. if it's really needed as a general
  purpose method, it could be restored. Also, no unittest was broken because of
  this change, which sort-of reinforces my point.
- Renamed (previously mentioned) StringField.deiter() method to
  StringField.to_single(), for better consistency with to_python() method
- Removed Field class as it was useless without the deiter() functionality (now
  belonging to StringField class)
- Moved ansi_date_re module variable to DateField class attribute
- Simplified implementation of DecimalField, FloatField and IntegerField to one
  line of code (using tests to make sure not to break any functionality)
- Renamed ItemMeta class (in models.py) to _ItemMeta to highlight its protected
  state (should not be externally imported)
- Added support for instantiating new items with dicts, to support
  deserializing items with their repr() string
- Added unittests for new functionality introduced
This commit is contained in:
Pablo Hoffman 2009-07-11 22:19:56 -03:00
parent 5054b67a02
commit e3fe0ef297
5 changed files with 84 additions and 89 deletions

View File

@ -1,2 +1 @@
from scrapy.contrib_exp.newitem.models import Item
from scrapy.contrib_exp.newitem.fields import *

View File

@ -3,18 +3,10 @@ import decimal
import re
import time
__all__ = ['MultiValuedField', 'BooleanField', 'DateField', 'DateTimeField',
'DecimalField', 'FloatField', 'IntegerField', 'StringField']
class BaseField(object):
def __init__(self, default=None):
self.default = default or self.to_python(None)
def assign(self, value):
return self.to_python(value)
def to_python(self, value):
"""
Converts the input value into the expected Python data type.
@ -23,18 +15,6 @@ class BaseField(object):
return value
class Field(BaseField):
def assign(self, value):
if hasattr(value, '__iter__'):
return self.to_python(self.deiter(value))
else:
return self.to_python(value)
def deiter(self, value):
"Converts the input iterable into a single value."
return ' '.join(value)
class MultiValuedField(BaseField):
def __init__(self, field_type, default=None):
self._field = field_type()
@ -47,15 +27,14 @@ class MultiValuedField(BaseField):
return [self._field.to_python(v) for v in value]
class BooleanField(Field):
class BooleanField(BaseField):
def to_python(self, value):
return bool(value)
ansi_date_re = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$')
class DateField(BaseField):
ansi_date_re = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$')
class DateField(Field):
def to_python(self, value):
if value is None:
return value
@ -64,7 +43,7 @@ class DateField(Field):
if isinstance(value, datetime.date):
return value
if not ansi_date_re.search(value):
if not self.ansi_date_re.search(value):
raise ValueError("Enter a valid date in YYYY-MM-DD format.")
year, month, day = map(int, value.split('-'))
@ -74,7 +53,7 @@ class DateField(Field):
raise ValueError("Invalid date: %s" % str(e))
class DateTimeField(Field):
class DateTimeField(BaseField):
def to_python(self, value):
if value is None:
return value
@ -111,41 +90,34 @@ class DateTimeField(Field):
raise ValueError('Enter a valid date/time in YYYY-MM-DD HH:MM[:ss[.uuuuuu]] format.')
class DecimalField(Field):
class DecimalField(BaseField):
def to_python(self, value):
if value is None:
return value
try:
return decimal.Decimal(value)
except decimal.InvalidOperation:
raise ValueError("This value must be a decimal number.")
return decimal.Decimal(value) if value is not None else None
class FloatField(Field):
class FloatField(BaseField):
def to_python(self, value):
if value is None:
return value
try:
return float(value)
except (TypeError, ValueError):
raise ValueError("This value must be a float.")
return float(value) if value is not None else None
class IntegerField(Field):
class IntegerField(BaseField):
def to_python(self, value):
if value is None:
return value
try:
return int(value)
except (TypeError, ValueError):
raise ValueError("This value must be an integer.")
return int(value) if value is not None else None
class StringField(Field):
class StringField(BaseField):
def to_python(self, value):
if isinstance(value, basestring):
if hasattr(value, '__iter__'):
return self.to_python(self.to_single(value))
elif isinstance(value, basestring):
return value
if value is None:
elif value is None:
return value
raise ValueError("This field must be a string.")
else:
raise ValueError("StringField expects a basestring, got %s" \
% type(value).__name__)
def to_single(self, value):
"Converts the input iterable into a single value."
return ' '.join(value)

View File

@ -2,40 +2,45 @@ from scrapy.item import ScrapedItem
from scrapy.contrib_exp.newitem.fields import BaseField
class ItemMeta(type):
class _ItemMeta(type):
def __new__(meta, class_name, bases, attrs):
cls = type.__new__(meta, class_name, bases, attrs)
cls.fields = cls.fields.copy()
for n, v in attrs.items():
for n, v in attrs.iteritems():
if isinstance(v, BaseField):
cls.fields[n] = v
return cls
class Item(ScrapedItem):
""" This is the base class for all scraped items. """
__metaclass__ = ItemMeta
__metaclass__ = _ItemMeta
fields = {}
def __init__(self):
def __init__(self, values=None):
self._values = {}
if isinstance(values, dict):
for k, v in values.iteritems():
setattr(self, k, v)
elif values is not None:
raise TypeError("Items must be instantiated with dicts, got %s" % \
type(values).__name__)
def __setattr__(self, name, value):
if name.startswith('_'):
return object.__setattr__(self, name, value)
return ScrapedItem.__setattr__(self, name, value)
if name in self.fields.keys():
self._values[name] = self.fields[name].assign(value)
self._values[name] = self.fields[name].to_python(value)
else:
raise AttributeError(name)
def __getattribute__(self, name):
if name.startswith('_') or name == 'fields':
return object.__getattribute__(self, name)
return ScrapedItem.__getattribute__(self, name)
if name in self.fields.keys():
try:
@ -46,9 +51,8 @@ class Item(ScrapedItem):
raise AttributeError(name)
def __repr__(self):
"""Generate a representation of this item that can be used to
reconstruct the item by evaluating it
"""
Generate the following format so that items can be deserialized
easily: ClassName({'attrib': value, ...})
"""
reprdict = dict((field, getattr(self, field)) for field in self.fields)
return "%s(%s)" % (self.__class__.__name__, repr(reprdict))
values = dict((field, getattr(self, field)) for field in self.fields)
return "%s(%s)" % (self.__class__.__name__, repr(values))

View File

@ -1,16 +1,16 @@
import unittest
import string
from scrapy.contrib_exp.newitem.adaptors import adaptor, ItemAdaptor
from scrapy.contrib_exp.newitem import *
from scrapy.contrib_exp.newitem import Item, fields
class BaseItem(Item):
name = StringField()
name = fields.StringField()
class TestItem(BaseItem):
url = StringField()
summary = StringField()
url = fields.StringField()
summary = fields.StringField()
class BaseAdaptor(ItemAdaptor):
@ -30,7 +30,7 @@ class InheritDefaultAdaptor(DefaultedAdaptor):
class MultiValuedTestItem(Item):
names = MultiValuedField(StringField)
names = fields.MultiValuedField(fields.StringField)
class MultiValuedItemAdaptor(ItemAdaptor):

View File

@ -2,7 +2,7 @@ import datetime
import decimal
import unittest
from scrapy.contrib_exp.newitem import *
from scrapy.contrib_exp.newitem import Item, fields
from scrapy.contrib_exp.newitem.fields import BaseField
@ -10,16 +10,30 @@ class NewItemTest(unittest.TestCase):
def test_simple(self):
class TestItem(Item):
name = StringField()
name = fields.StringField()
i = TestItem()
i.name = 'name'
assert i.name == 'name'
def test_init(self):
class TestItem(Item):
name = fields.StringField()
i = TestItem()
assert i.name is None
i2 = TestItem({'name': 'john doe'})
assert i2.name == 'john doe'
self.assertRaises(TypeError, TestItem, name='john doe')
self.assertRaises(AttributeError, TestItem, {'name': 'john doe', 'other': 'foo'})
def test_multi(self):
class TestMultiItem(Item):
name = StringField()
names = MultiValuedField(StringField)
name = fields.StringField()
names = fields.MultiValuedField(fields.StringField)
i = TestMultiItem()
i.name = 'name'
@ -43,14 +57,14 @@ class NewItemTest(unittest.TestCase):
def test_default_value(self):
class TestItem(Item):
name = StringField(default='John')
name = fields.StringField(default='John')
i = TestItem()
assert i.name == 'John'
def test_topython_iter(self):
def test_to_python_iter(self):
class TestItem(Item):
name = StringField()
name = fields.StringField()
i = TestItem()
i.name = ('John', 'Doe')
@ -58,25 +72,31 @@ class NewItemTest(unittest.TestCase):
def test_repr(self):
class TestItem(Item):
name = StringField()
name = fields.StringField()
number = fields.IntegerField()
i = TestItem()
i.name = 'John Doe'
assert i.__repr__() == "TestItem({'name': 'John Doe'})"
i.number = '123'
itemrepr = repr(i)
assert itemrepr == "TestItem({'name': 'John Doe', 'number': 123})"
i2 = eval(itemrepr)
assert i2.name == 'John Doe'
assert i2.number == 123
class NewItemFieldsTest(unittest.TestCase):
def test_base_field(self):
f = BaseField()
f = fields.BaseField()
assert f.default == None
assert f.assign(1) == 1
assert f.to_python(1) == 1
def test_boolean_field(self):
class TestItem(Item):
field = BooleanField()
field = fields.BooleanField()
i = TestItem()
@ -94,7 +114,7 @@ class NewItemFieldsTest(unittest.TestCase):
def test_date_field(self):
class TestItem(Item):
field = DateField()
field = fields.DateField()
i = TestItem()
@ -121,7 +141,7 @@ class NewItemFieldsTest(unittest.TestCase):
def test_datetime_field(self):
class TestItem(Item):
field = DateTimeField()
field = fields.DateTimeField()
i = TestItem()
@ -163,7 +183,7 @@ class NewItemFieldsTest(unittest.TestCase):
def test_decimal_field(self):
class TestItem(Item):
field = DecimalField()
field = fields.DecimalField()
i = TestItem()
@ -176,11 +196,11 @@ class NewItemFieldsTest(unittest.TestCase):
def set_invalid_value():
i.field = 'text'
self.assertRaises(ValueError, set_invalid_value)
self.assertRaises(decimal.InvalidOperation, set_invalid_value)
def test_float_field(self):
class TestItem(Item):
field = FloatField()
field = fields.FloatField()
i = TestItem()
@ -197,7 +217,7 @@ class NewItemFieldsTest(unittest.TestCase):
def test_integer_field(self):
class TestItem(Item):
field = IntegerField()
field = fields.IntegerField()
i = TestItem()
@ -214,7 +234,7 @@ class NewItemFieldsTest(unittest.TestCase):
def test_string_field(self):
class TestItem(Item):
field = StringField()
field = fields.StringField()
i = TestItem()