mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 12:43:41 +00:00
Some changes to newitem API and implementation:
- Dropped support for wildcard importing from newitem package (must now import from newitem.fields and don't use wildcard) - Removed assign() method from Fields as it was apparently redundant (with to_python() method) and I couldn't find any reason for keeping it (neither in the docs nor in the tests) - Moved deiter() method of Field to StringField, as its both its purpose and implementation was specific for strings. if it's really needed as a general purpose method, it could be restored. Also, no unittest was broken because of this change, which sort-of reinforces my point. - Renamed (previously mentioned) StringField.deiter() method to StringField.to_single(), for better consistency with to_python() method - Removed Field class as it was useless without the deiter() functionality (now belonging to StringField class) - Moved ansi_date_re module variable to DateField class attribute - Simplified implementation of DecimalField, FloatField and IntegerField to one line of code (using tests to make sure not to break any functionality) - Renamed ItemMeta class (in models.py) to _ItemMeta to highlight its protected state (should not be externally imported) - Added support for instantiating new items with dicts, to support deserializing items with their repr() string - Added unittests for new functionality introduced
This commit is contained in:
parent
5054b67a02
commit
e3fe0ef297
@ -1,2 +1 @@
|
||||
from scrapy.contrib_exp.newitem.models import Item
|
||||
from scrapy.contrib_exp.newitem.fields import *
|
||||
|
@ -3,18 +3,10 @@ import decimal
|
||||
import re
|
||||
import time
|
||||
|
||||
|
||||
__all__ = ['MultiValuedField', 'BooleanField', 'DateField', 'DateTimeField',
|
||||
'DecimalField', 'FloatField', 'IntegerField', 'StringField']
|
||||
|
||||
|
||||
class BaseField(object):
|
||||
def __init__(self, default=None):
|
||||
self.default = default or self.to_python(None)
|
||||
|
||||
def assign(self, value):
|
||||
return self.to_python(value)
|
||||
|
||||
def to_python(self, value):
|
||||
"""
|
||||
Converts the input value into the expected Python data type.
|
||||
@ -23,18 +15,6 @@ class BaseField(object):
|
||||
return value
|
||||
|
||||
|
||||
class Field(BaseField):
|
||||
def assign(self, value):
|
||||
if hasattr(value, '__iter__'):
|
||||
return self.to_python(self.deiter(value))
|
||||
else:
|
||||
return self.to_python(value)
|
||||
|
||||
def deiter(self, value):
|
||||
"Converts the input iterable into a single value."
|
||||
return ' '.join(value)
|
||||
|
||||
|
||||
class MultiValuedField(BaseField):
|
||||
def __init__(self, field_type, default=None):
|
||||
self._field = field_type()
|
||||
@ -47,15 +27,14 @@ class MultiValuedField(BaseField):
|
||||
return [self._field.to_python(v) for v in value]
|
||||
|
||||
|
||||
class BooleanField(Field):
|
||||
class BooleanField(BaseField):
|
||||
def to_python(self, value):
|
||||
return bool(value)
|
||||
|
||||
|
||||
ansi_date_re = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$')
|
||||
class DateField(BaseField):
|
||||
ansi_date_re = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$')
|
||||
|
||||
|
||||
class DateField(Field):
|
||||
def to_python(self, value):
|
||||
if value is None:
|
||||
return value
|
||||
@ -64,7 +43,7 @@ class DateField(Field):
|
||||
if isinstance(value, datetime.date):
|
||||
return value
|
||||
|
||||
if not ansi_date_re.search(value):
|
||||
if not self.ansi_date_re.search(value):
|
||||
raise ValueError("Enter a valid date in YYYY-MM-DD format.")
|
||||
|
||||
year, month, day = map(int, value.split('-'))
|
||||
@ -74,7 +53,7 @@ class DateField(Field):
|
||||
raise ValueError("Invalid date: %s" % str(e))
|
||||
|
||||
|
||||
class DateTimeField(Field):
|
||||
class DateTimeField(BaseField):
|
||||
def to_python(self, value):
|
||||
if value is None:
|
||||
return value
|
||||
@ -111,41 +90,34 @@ class DateTimeField(Field):
|
||||
raise ValueError('Enter a valid date/time in YYYY-MM-DD HH:MM[:ss[.uuuuuu]] format.')
|
||||
|
||||
|
||||
class DecimalField(Field):
|
||||
class DecimalField(BaseField):
|
||||
def to_python(self, value):
|
||||
if value is None:
|
||||
return value
|
||||
try:
|
||||
return decimal.Decimal(value)
|
||||
except decimal.InvalidOperation:
|
||||
raise ValueError("This value must be a decimal number.")
|
||||
return decimal.Decimal(value) if value is not None else None
|
||||
|
||||
|
||||
class FloatField(Field):
|
||||
class FloatField(BaseField):
|
||||
def to_python(self, value):
|
||||
if value is None:
|
||||
return value
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
raise ValueError("This value must be a float.")
|
||||
return float(value) if value is not None else None
|
||||
|
||||
|
||||
class IntegerField(Field):
|
||||
class IntegerField(BaseField):
|
||||
def to_python(self, value):
|
||||
if value is None:
|
||||
return value
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
raise ValueError("This value must be an integer.")
|
||||
return int(value) if value is not None else None
|
||||
|
||||
|
||||
class StringField(Field):
|
||||
class StringField(BaseField):
|
||||
def to_python(self, value):
|
||||
if isinstance(value, basestring):
|
||||
if hasattr(value, '__iter__'):
|
||||
return self.to_python(self.to_single(value))
|
||||
elif isinstance(value, basestring):
|
||||
return value
|
||||
if value is None:
|
||||
elif value is None:
|
||||
return value
|
||||
raise ValueError("This field must be a string.")
|
||||
else:
|
||||
raise ValueError("StringField expects a basestring, got %s" \
|
||||
% type(value).__name__)
|
||||
|
||||
def to_single(self, value):
|
||||
"Converts the input iterable into a single value."
|
||||
return ' '.join(value)
|
||||
|
||||
|
@ -2,40 +2,45 @@ from scrapy.item import ScrapedItem
|
||||
from scrapy.contrib_exp.newitem.fields import BaseField
|
||||
|
||||
|
||||
class ItemMeta(type):
|
||||
class _ItemMeta(type):
|
||||
|
||||
def __new__(meta, class_name, bases, attrs):
|
||||
cls = type.__new__(meta, class_name, bases, attrs)
|
||||
cls.fields = cls.fields.copy()
|
||||
for n, v in attrs.items():
|
||||
for n, v in attrs.iteritems():
|
||||
if isinstance(v, BaseField):
|
||||
cls.fields[n] = v
|
||||
|
||||
return cls
|
||||
|
||||
|
||||
class Item(ScrapedItem):
|
||||
""" This is the base class for all scraped items. """
|
||||
|
||||
__metaclass__ = ItemMeta
|
||||
__metaclass__ = _ItemMeta
|
||||
|
||||
fields = {}
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, values=None):
|
||||
self._values = {}
|
||||
if isinstance(values, dict):
|
||||
for k, v in values.iteritems():
|
||||
setattr(self, k, v)
|
||||
elif values is not None:
|
||||
raise TypeError("Items must be instantiated with dicts, got %s" % \
|
||||
type(values).__name__)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name.startswith('_'):
|
||||
return object.__setattr__(self, name, value)
|
||||
return ScrapedItem.__setattr__(self, name, value)
|
||||
|
||||
if name in self.fields.keys():
|
||||
self._values[name] = self.fields[name].assign(value)
|
||||
self._values[name] = self.fields[name].to_python(value)
|
||||
else:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __getattribute__(self, name):
|
||||
if name.startswith('_') or name == 'fields':
|
||||
return object.__getattribute__(self, name)
|
||||
return ScrapedItem.__getattribute__(self, name)
|
||||
|
||||
if name in self.fields.keys():
|
||||
try:
|
||||
@ -46,9 +51,8 @@ class Item(ScrapedItem):
|
||||
raise AttributeError(name)
|
||||
|
||||
def __repr__(self):
|
||||
"""Generate a representation of this item that can be used to
|
||||
reconstruct the item by evaluating it
|
||||
"""
|
||||
Generate the following format so that items can be deserialized
|
||||
easily: ClassName({'attrib': value, ...})
|
||||
"""
|
||||
reprdict = dict((field, getattr(self, field)) for field in self.fields)
|
||||
return "%s(%s)" % (self.__class__.__name__, repr(reprdict))
|
||||
values = dict((field, getattr(self, field)) for field in self.fields)
|
||||
return "%s(%s)" % (self.__class__.__name__, repr(values))
|
||||
|
@ -1,16 +1,16 @@
|
||||
import unittest
|
||||
import string
|
||||
from scrapy.contrib_exp.newitem.adaptors import adaptor, ItemAdaptor
|
||||
from scrapy.contrib_exp.newitem import *
|
||||
from scrapy.contrib_exp.newitem import Item, fields
|
||||
|
||||
|
||||
class BaseItem(Item):
|
||||
name = StringField()
|
||||
name = fields.StringField()
|
||||
|
||||
|
||||
class TestItem(BaseItem):
|
||||
url = StringField()
|
||||
summary = StringField()
|
||||
url = fields.StringField()
|
||||
summary = fields.StringField()
|
||||
|
||||
|
||||
class BaseAdaptor(ItemAdaptor):
|
||||
@ -30,7 +30,7 @@ class InheritDefaultAdaptor(DefaultedAdaptor):
|
||||
|
||||
|
||||
class MultiValuedTestItem(Item):
|
||||
names = MultiValuedField(StringField)
|
||||
names = fields.MultiValuedField(fields.StringField)
|
||||
|
||||
|
||||
class MultiValuedItemAdaptor(ItemAdaptor):
|
||||
|
@ -2,7 +2,7 @@ import datetime
|
||||
import decimal
|
||||
import unittest
|
||||
|
||||
from scrapy.contrib_exp.newitem import *
|
||||
from scrapy.contrib_exp.newitem import Item, fields
|
||||
from scrapy.contrib_exp.newitem.fields import BaseField
|
||||
|
||||
|
||||
@ -10,16 +10,30 @@ class NewItemTest(unittest.TestCase):
|
||||
|
||||
def test_simple(self):
|
||||
class TestItem(Item):
|
||||
name = StringField()
|
||||
name = fields.StringField()
|
||||
|
||||
i = TestItem()
|
||||
i.name = 'name'
|
||||
assert i.name == 'name'
|
||||
|
||||
def test_init(self):
|
||||
class TestItem(Item):
|
||||
name = fields.StringField()
|
||||
|
||||
i = TestItem()
|
||||
assert i.name is None
|
||||
|
||||
i2 = TestItem({'name': 'john doe'})
|
||||
assert i2.name == 'john doe'
|
||||
|
||||
self.assertRaises(TypeError, TestItem, name='john doe')
|
||||
|
||||
self.assertRaises(AttributeError, TestItem, {'name': 'john doe', 'other': 'foo'})
|
||||
|
||||
def test_multi(self):
|
||||
class TestMultiItem(Item):
|
||||
name = StringField()
|
||||
names = MultiValuedField(StringField)
|
||||
name = fields.StringField()
|
||||
names = fields.MultiValuedField(fields.StringField)
|
||||
|
||||
i = TestMultiItem()
|
||||
i.name = 'name'
|
||||
@ -43,14 +57,14 @@ class NewItemTest(unittest.TestCase):
|
||||
|
||||
def test_default_value(self):
|
||||
class TestItem(Item):
|
||||
name = StringField(default='John')
|
||||
name = fields.StringField(default='John')
|
||||
|
||||
i = TestItem()
|
||||
assert i.name == 'John'
|
||||
|
||||
def test_topython_iter(self):
|
||||
def test_to_python_iter(self):
|
||||
class TestItem(Item):
|
||||
name = StringField()
|
||||
name = fields.StringField()
|
||||
|
||||
i = TestItem()
|
||||
i.name = ('John', 'Doe')
|
||||
@ -58,25 +72,31 @@ class NewItemTest(unittest.TestCase):
|
||||
|
||||
def test_repr(self):
|
||||
class TestItem(Item):
|
||||
name = StringField()
|
||||
name = fields.StringField()
|
||||
number = fields.IntegerField()
|
||||
|
||||
i = TestItem()
|
||||
i.name = 'John Doe'
|
||||
assert i.__repr__() == "TestItem({'name': 'John Doe'})"
|
||||
i.number = '123'
|
||||
itemrepr = repr(i)
|
||||
assert itemrepr == "TestItem({'name': 'John Doe', 'number': 123})"
|
||||
|
||||
i2 = eval(itemrepr)
|
||||
assert i2.name == 'John Doe'
|
||||
assert i2.number == 123
|
||||
|
||||
|
||||
class NewItemFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_base_field(self):
|
||||
f = BaseField()
|
||||
f = fields.BaseField()
|
||||
|
||||
assert f.default == None
|
||||
assert f.assign(1) == 1
|
||||
assert f.to_python(1) == 1
|
||||
|
||||
def test_boolean_field(self):
|
||||
class TestItem(Item):
|
||||
field = BooleanField()
|
||||
field = fields.BooleanField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
@ -94,7 +114,7 @@ class NewItemFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_date_field(self):
|
||||
class TestItem(Item):
|
||||
field = DateField()
|
||||
field = fields.DateField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
@ -121,7 +141,7 @@ class NewItemFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_datetime_field(self):
|
||||
class TestItem(Item):
|
||||
field = DateTimeField()
|
||||
field = fields.DateTimeField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
@ -163,7 +183,7 @@ class NewItemFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_decimal_field(self):
|
||||
class TestItem(Item):
|
||||
field = DecimalField()
|
||||
field = fields.DecimalField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
@ -176,11 +196,11 @@ class NewItemFieldsTest(unittest.TestCase):
|
||||
def set_invalid_value():
|
||||
i.field = 'text'
|
||||
|
||||
self.assertRaises(ValueError, set_invalid_value)
|
||||
self.assertRaises(decimal.InvalidOperation, set_invalid_value)
|
||||
|
||||
def test_float_field(self):
|
||||
class TestItem(Item):
|
||||
field = FloatField()
|
||||
field = fields.FloatField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
@ -197,7 +217,7 @@ class NewItemFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_integer_field(self):
|
||||
class TestItem(Item):
|
||||
field = IntegerField()
|
||||
field = fields.IntegerField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
@ -214,7 +234,7 @@ class NewItemFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_string_field(self):
|
||||
class TestItem(Item):
|
||||
field = StringField()
|
||||
field = fields.StringField()
|
||||
|
||||
i = TestItem()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user