1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 12:43:57 +00:00

- Fixed bug in attributes assignation (empty attributes being set)

- Added GUID setting to FeedSpider

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40361
This commit is contained in:
elpolilla 2008-11-04 10:57:59 +00:00
parent 9b46c20da2
commit bd38a312d4
2 changed files with 20 additions and 12 deletions

View File

@ -8,6 +8,7 @@ from scrapy.spider import BaseSpider
from scrapy.item import ScrapedItem from scrapy.item import ScrapedItem
from scrapy.xpath.selector import XmlXPathSelector from scrapy.xpath.selector import XmlXPathSelector
from scrapy.core.exceptions import UsageError from scrapy.core.exceptions import UsageError
from scrapy.utils.iterators import xmliter, csviter
from scrapy.utils.misc import hash_values from scrapy.utils.misc import hash_values
class BasicSpider(BaseSpider): class BasicSpider(BaseSpider):
@ -17,7 +18,7 @@ class BasicSpider(BaseSpider):
gen_guid_attribs = [] gen_guid_attribs = []
def set_guid(self, item): def set_guid(self, item):
item.guid = hash_values(*[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs]) item.guid = hash_values(self.domain_name, *[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
class CrawlSpider(BasicSpider): class CrawlSpider(BasicSpider):
""" """
@ -93,6 +94,12 @@ class XMLFeedSpider(BasicSpider):
iternodes = True iternodes = True
itertag = 'product' itertag = 'product'
def parse_item_wrapper(self, response, xSel):
ret = self.parse_item(response, xSel)
if isinstance(ret, ScrapedItem):
self.set_guid(ret)
return ret
def parse(self, response): def parse(self, response):
if not hasattr(self, 'parse_item'): if not hasattr(self, 'parse_item'):
raise NotConfigured('You must define parse_item method in order to scrape this feed') raise NotConfigured('You must define parse_item method in order to scrape this feed')
@ -102,5 +109,5 @@ class XMLFeedSpider(BasicSpider):
else: else:
nodes = XmlXPathSelector(response).x('//%s' % self.itertag) nodes = XmlXPathSelector(response).x('//%s' % self.itertag)
return (self.parse_item(response, xSel) for xSel in nodes) return (self.parse_item_wrapper(response, xSel) for xSel in nodes)

View File

@ -24,17 +24,18 @@ class ScrapedItem(object):
def attribute(self, attrname, value, override=False, add=False, debug=False): def attribute(self, attrname, value, override=False, add=False, debug=False):
val = self._adaptors_dict.execute(attrname, value, debug) val = self._adaptors_dict.execute(attrname, value, debug)
curr_val = getattr(self, attrname, None) if val:
if not curr_val: curr_val = getattr(self, attrname, None)
setattr(self, attrname, val) if not curr_val:
else:
if override:
setattr(self, attrname, val) setattr(self, attrname, val)
elif add and all(hasattr(var, '__iter__') for var in (curr_val, val)): else:
newval = [] if override:
newval.extend(curr_val) setattr(self, attrname, val)
newval.extend(val) elif add and all(hasattr(var, '__iter__') for var in (curr_val, val)):
setattr(self, attrname, newval) newval = []
newval.extend(curr_val)
newval.extend(val)
setattr(self, attrname, newval)
def __sub__(self, other): def __sub__(self, other):
raise NotImplementedError raise NotImplementedError