mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 10:43:48 +00:00
119 lines
2.9 KiB
Plaintext
119 lines
2.9 KiB
Plaintext
|
= SEP-005: Detailed !ItemBuilder API use =
|
||
|
|
||
|
[[PageOutline(2-5,Contents)]]
|
||
|
|
||
|
||'''SEP:'''||5||
|
||
|
||'''Title:'''||!ItemBuilder API||
|
||
|
||'''Author:'''||Ismael Carnales, Pablo Hoffman||
|
||
|
||'''Created:'''||2009-07-24||
|
||
|
||'''Status'''||Obsoleted by [wiki:SEP-008]||
|
||
|
|
||
|
Item class for examples:
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class NewsItem(Item):
|
||
|
url = fields.TextField()
|
||
|
headline = fields.TextField()
|
||
|
content = fields.TextField()
|
||
|
published = fields.DateField()
|
||
|
}}}
|
||
|
|
||
|
== Setting expanders ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class NewsItemBuilder(ItemBuilder):
|
||
|
item_class = NewsItem
|
||
|
|
||
|
headline = reducers.Reducer(extract, remove_tags(), unquote(), strip)
|
||
|
}}}
|
||
|
|
||
|
This approach will override the Reducer class for !BuilderFields depending on their Item Field class:
|
||
|
|
||
|
* !MultivaluedField = PassValue
|
||
|
* !TextField = JoinStrings
|
||
|
* other = TakeFirst
|
||
|
|
||
|
== Setting reducers ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class NewsItemBuilder(ItemBuilder):
|
||
|
item_class = NewsItem
|
||
|
|
||
|
headline = reducers.TakeFirst(extract, remove_tags(), unquote(), strip)
|
||
|
published = reducers.Reducer(extract, remove_tags(), unquote(), strip)
|
||
|
}}}
|
||
|
|
||
|
As with the previous example this would select join_strings as the reducer for content
|
||
|
|
||
|
== Setting expanders/reducers new way ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class NewsItemBuilder(ItemBuilder):
|
||
|
item_class = NewsItem
|
||
|
|
||
|
headline = BuilderField(extract, remove_tags(), unquote(), strip)
|
||
|
content = BuilderField(extract, remove_tags(), unquote(), strip)
|
||
|
|
||
|
class Reducer:
|
||
|
headline = TakeFirst
|
||
|
}}}
|
||
|
|
||
|
== Extending !ItemBuilder ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class SiteNewsItemBuilder(NewsItemBuilder):
|
||
|
published = reducers.Reducer(extract, remove_tags(), unquote(), strip, to_date('%d.%m.%Y'))
|
||
|
}}}
|
||
|
|
||
|
== Extending !ItemBuilder using statich methods ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class SiteNewsItemBuilder(NewsItemBuilder):
|
||
|
published = reducers.Reducer(NewsItemBuilder.published, to_date('%d.%m.%Y'))
|
||
|
}}}
|
||
|
|
||
|
== Using default_builder ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class DefaultedNewsItemBuilder(ItemBuilder):
|
||
|
item_class = NewsItem
|
||
|
|
||
|
default_builder = reducers.Reducer(extract, remove_tags(), unquote(), strip)
|
||
|
}}}
|
||
|
|
||
|
This will use default_builder as the builder for every field in the item class.
|
||
|
As a reducer is not set reducers will be set based on Item Field classess.
|
||
|
|
||
|
== Reset default_builder for a field ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class DefaultedNewsItemBuilder(ItemBuilder):
|
||
|
item_class = NewsItem
|
||
|
|
||
|
default_builder = reducers.Reducer(extract, remove_tags(), unquote(), strip)
|
||
|
url = BuilderField()
|
||
|
}}}
|
||
|
|
||
|
== Extending default !ItemBuilder ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class SiteNewsItemBuilder(NewsItemBuilder):
|
||
|
published = reducers.Reducer(extract, remove_tags(), unquote(), strip, to_date('%d.%m.%Y'))
|
||
|
}}}
|
||
|
|
||
|
== Extending default !ItemBuilder using static methods ==
|
||
|
|
||
|
{{{
|
||
|
#!python
|
||
|
class SiteNewsItemBuilder(NewsItemBuilder):
|
||
|
published = reducers.Reducer(NewsItemBuilder.default_builder, to_date('%d.%m.%Y'))
|
||
|
}}}
|