1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 12:43:57 +00:00

- Fixed bug in attributes assignation (empty attributes being set)

- Added GUID setting to FeedSpider

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40361
This commit is contained in:
elpolilla 2008-11-04 10:57:59 +00:00
parent 9b46c20da2
commit bd38a312d4
2 changed files with 20 additions and 12 deletions

View File

@ -8,6 +8,7 @@ from scrapy.spider import BaseSpider
from scrapy.item import ScrapedItem
from scrapy.xpath.selector import XmlXPathSelector
from scrapy.core.exceptions import UsageError
from scrapy.utils.iterators import xmliter, csviter
from scrapy.utils.misc import hash_values
class BasicSpider(BaseSpider):
@ -17,7 +18,7 @@ class BasicSpider(BaseSpider):
gen_guid_attribs = []
def set_guid(self, item):
item.guid = hash_values(*[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
item.guid = hash_values(self.domain_name, *[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
class CrawlSpider(BasicSpider):
"""
@ -93,6 +94,12 @@ class XMLFeedSpider(BasicSpider):
iternodes = True
itertag = 'product'
def parse_item_wrapper(self, response, xSel):
ret = self.parse_item(response, xSel)
if isinstance(ret, ScrapedItem):
self.set_guid(ret)
return ret
def parse(self, response):
if not hasattr(self, 'parse_item'):
raise NotConfigured('You must define parse_item method in order to scrape this feed')
@ -102,5 +109,5 @@ class XMLFeedSpider(BasicSpider):
else:
nodes = XmlXPathSelector(response).x('//%s' % self.itertag)
return (self.parse_item(response, xSel) for xSel in nodes)
return (self.parse_item_wrapper(response, xSel) for xSel in nodes)

View File

@ -24,17 +24,18 @@ class ScrapedItem(object):
def attribute(self, attrname, value, override=False, add=False, debug=False):
val = self._adaptors_dict.execute(attrname, value, debug)
curr_val = getattr(self, attrname, None)
if not curr_val:
setattr(self, attrname, val)
else:
if override:
if val:
curr_val = getattr(self, attrname, None)
if not curr_val:
setattr(self, attrname, val)
elif add and all(hasattr(var, '__iter__') for var in (curr_val, val)):
newval = []
newval.extend(curr_val)
newval.extend(val)
setattr(self, attrname, newval)
else:
if override:
setattr(self, attrname, val)
elif add and all(hasattr(var, '__iter__') for var in (curr_val, val)):
newval = []
newval.extend(curr_val)
newval.extend(val)
setattr(self, attrname, newval)
def __sub__(self, other):
raise NotImplementedError