mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 02:04:25 +00:00
-Fixed bug in ExtractImages adaptor that made it fail if it received a string
-Removed BasicSpider and it's guid generation method because it wasnt generic enough to be in the framework --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40380
This commit is contained in:
parent
f5eb71fb69
commit
c209f214a8
@ -43,9 +43,9 @@ def extract_unquoted(locations):
|
||||
|
||||
class ExtractImages(object):
|
||||
"""
|
||||
This adaptor receives either an XPathSelector containing
|
||||
the desired locations for finding urls, or a list of relative
|
||||
links to be resolved.
|
||||
This adaptor may receive either an XPathSelector containing
|
||||
the desired locations for finding urls, a list of relative
|
||||
links to be resolved, or simply a link (relative or not).
|
||||
|
||||
Input: XPathSelector, XPathSelectorList, iterable
|
||||
Output: list of unicodes
|
||||
@ -79,6 +79,9 @@ class ExtractImages(object):
|
||||
if not self.base_url:
|
||||
raise AttributeError('You must specify either a response or a base_url to the ExtractImages adaptor.')
|
||||
|
||||
if isinstance(locations, basestring):
|
||||
locations = [locations]
|
||||
|
||||
rel_links = []
|
||||
for location in flatten(locations):
|
||||
if isinstance(location, (XPathSelector, XPathSelectorList)):
|
||||
|
@ -11,16 +11,7 @@ from scrapy.core.exceptions import UsageError
|
||||
from scrapy.utils.iterators import xmliter, csviter
|
||||
from scrapy.utils.misc import hash_values
|
||||
|
||||
class BasicSpider(BaseSpider):
|
||||
"""
|
||||
This class is basically a BaseSpider with support for GUID generating
|
||||
"""
|
||||
gen_guid_attribs = []
|
||||
|
||||
def set_guid(self, item):
|
||||
item.guid = hash_values(self.domain_name, *[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
|
||||
|
||||
class CrawlSpider(BasicSpider):
|
||||
class CrawlSpider(BaseSpider):
|
||||
"""
|
||||
This class works as a base class for spiders that crawl over websites
|
||||
"""
|
||||
@ -86,7 +77,15 @@ class CrawlSpider(BasicSpider):
|
||||
self.set_guid(entry)
|
||||
return ret
|
||||
|
||||
class XMLFeedSpider(BasicSpider):
|
||||
def set_guid(self, item):
|
||||
"""
|
||||
This method is called whenever the spider returns items, for each item.
|
||||
It should set the 'guid' attribute to the given item with a string that
|
||||
identifies the item uniquely.
|
||||
"""
|
||||
raise NotConfigured('You must define set_guid method in order to scrape items.')
|
||||
|
||||
class XMLFeedSpider(BaseSpider):
|
||||
"""
|
||||
This class intends to be the base class for spiders that scrape
|
||||
from XML feeds.
|
||||
|
Loading…
x
Reference in New Issue
Block a user