1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 02:04:25 +00:00

-Fixed bug in ExtractImages adaptor that made it fail if it received a string

-Removed BasicSpider and it's guid generation method because it wasnt generic enough to be in the framework

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40380
This commit is contained in:
elpolilla 2008-11-14 12:25:56 +00:00
parent f5eb71fb69
commit c209f214a8
2 changed files with 16 additions and 14 deletions

View File

@ -43,9 +43,9 @@ def extract_unquoted(locations):
class ExtractImages(object):
"""
This adaptor receives either an XPathSelector containing
the desired locations for finding urls, or a list of relative
links to be resolved.
This adaptor may receive either an XPathSelector containing
the desired locations for finding urls, a list of relative
links to be resolved, or simply a link (relative or not).
Input: XPathSelector, XPathSelectorList, iterable
Output: list of unicodes
@ -79,6 +79,9 @@ class ExtractImages(object):
if not self.base_url:
raise AttributeError('You must specify either a response or a base_url to the ExtractImages adaptor.')
if isinstance(locations, basestring):
locations = [locations]
rel_links = []
for location in flatten(locations):
if isinstance(location, (XPathSelector, XPathSelectorList)):

View File

@ -11,16 +11,7 @@ from scrapy.core.exceptions import UsageError
from scrapy.utils.iterators import xmliter, csviter
from scrapy.utils.misc import hash_values
class BasicSpider(BaseSpider):
"""
This class is basically a BaseSpider with support for GUID generating
"""
gen_guid_attribs = []
def set_guid(self, item):
item.guid = hash_values(self.domain_name, *[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
class CrawlSpider(BasicSpider):
class CrawlSpider(BaseSpider):
"""
This class works as a base class for spiders that crawl over websites
"""
@ -86,7 +77,15 @@ class CrawlSpider(BasicSpider):
self.set_guid(entry)
return ret
class XMLFeedSpider(BasicSpider):
def set_guid(self, item):
"""
This method is called whenever the spider returns items, for each item.
It should set the 'guid' attribute to the given item with a string that
identifies the item uniquely.
"""
raise NotConfigured('You must define set_guid method in order to scrape items.')
class XMLFeedSpider(BaseSpider):
"""
This class intends to be the base class for spiders that scrape
from XML feeds.