mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 12:43:57 +00:00
- Fixed bug in attributes assignation (empty attributes being set)
- Added GUID setting to FeedSpider --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40361
This commit is contained in:
parent
9b46c20da2
commit
bd38a312d4
@ -8,6 +8,7 @@ from scrapy.spider import BaseSpider
|
||||
from scrapy.item import ScrapedItem
|
||||
from scrapy.xpath.selector import XmlXPathSelector
|
||||
from scrapy.core.exceptions import UsageError
|
||||
from scrapy.utils.iterators import xmliter, csviter
|
||||
from scrapy.utils.misc import hash_values
|
||||
|
||||
class BasicSpider(BaseSpider):
|
||||
@ -17,7 +18,7 @@ class BasicSpider(BaseSpider):
|
||||
gen_guid_attribs = []
|
||||
|
||||
def set_guid(self, item):
|
||||
item.guid = hash_values(*[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
|
||||
item.guid = hash_values(self.domain_name, *[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
|
||||
|
||||
class CrawlSpider(BasicSpider):
|
||||
"""
|
||||
@ -93,6 +94,12 @@ class XMLFeedSpider(BasicSpider):
|
||||
iternodes = True
|
||||
itertag = 'product'
|
||||
|
||||
def parse_item_wrapper(self, response, xSel):
|
||||
ret = self.parse_item(response, xSel)
|
||||
if isinstance(ret, ScrapedItem):
|
||||
self.set_guid(ret)
|
||||
return ret
|
||||
|
||||
def parse(self, response):
|
||||
if not hasattr(self, 'parse_item'):
|
||||
raise NotConfigured('You must define parse_item method in order to scrape this feed')
|
||||
@ -102,5 +109,5 @@ class XMLFeedSpider(BasicSpider):
|
||||
else:
|
||||
nodes = XmlXPathSelector(response).x('//%s' % self.itertag)
|
||||
|
||||
return (self.parse_item(response, xSel) for xSel in nodes)
|
||||
return (self.parse_item_wrapper(response, xSel) for xSel in nodes)
|
||||
|
||||
|
@ -24,17 +24,18 @@ class ScrapedItem(object):
|
||||
|
||||
def attribute(self, attrname, value, override=False, add=False, debug=False):
|
||||
val = self._adaptors_dict.execute(attrname, value, debug)
|
||||
curr_val = getattr(self, attrname, None)
|
||||
if not curr_val:
|
||||
setattr(self, attrname, val)
|
||||
else:
|
||||
if override:
|
||||
if val:
|
||||
curr_val = getattr(self, attrname, None)
|
||||
if not curr_val:
|
||||
setattr(self, attrname, val)
|
||||
elif add and all(hasattr(var, '__iter__') for var in (curr_val, val)):
|
||||
newval = []
|
||||
newval.extend(curr_val)
|
||||
newval.extend(val)
|
||||
setattr(self, attrname, newval)
|
||||
else:
|
||||
if override:
|
||||
setattr(self, attrname, val)
|
||||
elif add and all(hasattr(var, '__iter__') for var in (curr_val, val)):
|
||||
newval = []
|
||||
newval.extend(curr_val)
|
||||
newval.extend(val)
|
||||
setattr(self, attrname, newval)
|
||||
|
||||
def __sub__(self, other):
|
||||
raise NotImplementedError
|
||||
|
Loading…
x
Reference in New Issue
Block a user