mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 13:23:59 +00:00
Removed support for default values in Scrapy items, which have proven confusing in the past
This commit is contained in:
parent
503f302010
commit
951ba507f9
@ -65,8 +65,8 @@ Defining our Item
|
||||
=================
|
||||
|
||||
`Items` are containers that will be loaded with the scraped data; they work
|
||||
like simple python dicts but they offer some additional features like providing
|
||||
default values.
|
||||
like simple python dicts but provide additional protecting against populating
|
||||
undeclared fields, to prevent typos.
|
||||
|
||||
They are declared by creating an :class:`scrapy.item.Item` class an defining
|
||||
its attributes as :class:`scrapy.item.Field` objects, like you will in an ORM
|
||||
|
@ -30,8 +30,8 @@ objects. Here is an example::
|
||||
class Product(Item):
|
||||
name = Field()
|
||||
price = Field()
|
||||
stock = Field(default=0)
|
||||
last_updated = Field()
|
||||
stock = Field()
|
||||
last_updated = Field(serializer=str)
|
||||
|
||||
.. note:: Those familiar with `Django`_ will notice that Scrapy Items are
|
||||
declared similar to `Django Models`_, except that Scrapy Items are much
|
||||
@ -46,8 +46,8 @@ Item Fields
|
||||
===========
|
||||
|
||||
:class:`Field` objects are used to specify metadata for each field. For
|
||||
example, the default value for the ``stock`` field illustrated in the example
|
||||
above.
|
||||
example, the serializer function for the ``last_updated`` field illustrated in
|
||||
the example above.
|
||||
|
||||
You can specify any kind of metadata for each field. There is no restriction on
|
||||
the values accepted by :class:`Field` objects. For this same
|
||||
@ -95,10 +95,7 @@ Getting field values
|
||||
>>> product['price']
|
||||
1000
|
||||
|
||||
>>> product['stock'] # getting field with default value
|
||||
0
|
||||
|
||||
>>> product['last_updated'] # getting field with no default value
|
||||
>>> product['last_updated']
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
KeyError: 'last_updated'
|
||||
@ -175,28 +172,6 @@ Creating items from dicts::
|
||||
...
|
||||
KeyError: 'Product does not support field: lala'
|
||||
|
||||
Default values
|
||||
==============
|
||||
|
||||
The only field metadata key supported by Items themselves is ``default``, which
|
||||
specifies the default value to return when trying to access a field which
|
||||
wasn't populated before.
|
||||
|
||||
So, for the ``Product`` item declared above::
|
||||
|
||||
>>> product = Product()
|
||||
|
||||
>>> product['stock'] # field with default value
|
||||
0
|
||||
|
||||
>>> product['name'] # field with no default value
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
KeyError: 'name'
|
||||
|
||||
>>> product.get('name') is None
|
||||
True
|
||||
|
||||
Extending Items
|
||||
===============
|
||||
|
||||
@ -206,16 +181,16 @@ fields) by declaring a subclass of your original Item.
|
||||
For example::
|
||||
|
||||
class DiscountedProduct(Product):
|
||||
discount_percent = Field(default=0)
|
||||
discount_percent = Field(serializer=str)
|
||||
discount_expiration_date = Field()
|
||||
|
||||
You can also extend field metadata by using the previous field metadata and
|
||||
appending more values, or changing existing values, like this::
|
||||
|
||||
class SpecificProduct(Product):
|
||||
name = Field(Product.fields['name'], default='product')
|
||||
name = Field(Product.fields['name'], serializer=my_serializer)
|
||||
|
||||
That adds (or replaces) the ``default`` metadata key for the ``name`` field,
|
||||
That adds (or replaces) the ``serializer`` metadata key for the ``name`` field,
|
||||
keeping all the previously existing metadata values.
|
||||
|
||||
Item objects
|
||||
|
@ -45,13 +45,7 @@ class DictItem(DictMixin, BaseItem):
|
||||
self[k] = v
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return self._values[key]
|
||||
except KeyError:
|
||||
field = self.fields[key]
|
||||
if 'default' in field:
|
||||
return field['default']
|
||||
raise
|
||||
return self._values[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key in self.fields:
|
||||
|
@ -25,7 +25,7 @@ class NewFieldPersonItem(BasePersonItem):
|
||||
|
||||
|
||||
class OverrideFieldPersonItem(BasePersonItem):
|
||||
age = Field(default=1)
|
||||
age = Field()
|
||||
|
||||
|
||||
class DjangoItemTest(unittest.TestCase):
|
||||
@ -45,7 +45,6 @@ class DjangoItemTest(unittest.TestCase):
|
||||
def test_override_field(self):
|
||||
i = OverrideFieldPersonItem()
|
||||
self.assertEqual(i.fields.keys(), ['age', 'name'])
|
||||
self.assertEqual(i.fields['age'], {'default': 1})
|
||||
|
||||
def test_save(self):
|
||||
i = BasePersonItem()
|
||||
@ -65,5 +64,4 @@ class DjangoItemTest(unittest.TestCase):
|
||||
person = i.save(commit=False)
|
||||
|
||||
self.assertEqual(person.name, 'John')
|
||||
self.assertEqual(person.age, 1)
|
||||
|
||||
|
@ -40,13 +40,6 @@ class ItemTest(unittest.TestCase):
|
||||
self.assertRaises(KeyError, i.__setitem__, 'field', 'text')
|
||||
self.assertRaises(KeyError, i.__getitem__, 'field')
|
||||
|
||||
def test_default_value(self):
|
||||
class TestItem(Item):
|
||||
name = Field(default=u'John')
|
||||
|
||||
i = TestItem()
|
||||
self.assertEqual(i['name'], u'John')
|
||||
|
||||
def test_repr(self):
|
||||
class TestItem(Item):
|
||||
name = Field()
|
||||
|
Loading…
x
Reference in New Issue
Block a user