-Fixed bug in ExtractImages adaptor that made it fail if it received a string

-Removed BasicSpider and it's guid generation method because it wasnt generic enough to be in the framework --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40380
2025-02-23 02:04:25 +00:00 · 2008-11-14 12:25:56 +00:00 · 2008-11-14 12:25:56 +00:00 · c209f214a8
commit c209f214a8
parent f5eb71fb69
2 changed files with 16 additions and 14 deletions
--- a/scrapy/trunk/scrapy/contrib/adaptors/extraction.py
+++ b/scrapy/trunk/scrapy/contrib/adaptors/extraction.py
@ -43,9 +43,9 @@ def extract_unquoted(locations):

 class ExtractImages(object):
    """
-    This adaptor receives either an XPathSelector containing
-    the desired locations for finding urls, or a list of relative
-    links to be resolved.
+    This adaptor may receive either an XPathSelector containing
+    the desired locations for finding urls, a list of relative
+    links to be resolved, or simply a link (relative or not).

    Input: XPathSelector, XPathSelectorList, iterable
    Output: list of unicodes
@ -79,6 +79,9 @@ class ExtractImages(object):
        if not self.base_url:
            raise AttributeError('You must specify either a response or a base_url to the ExtractImages adaptor.')
        
+        if isinstance(locations, basestring):
+            locations = [locations]
+
        rel_links = []
        for location in flatten(locations):
            if isinstance(location, (XPathSelector, XPathSelectorList)):
--- a/scrapy/trunk/scrapy/contrib/spiders.py
+++ b/scrapy/trunk/scrapy/contrib/spiders.py
@ -11,16 +11,7 @@ from scrapy.core.exceptions import UsageError
 from scrapy.utils.iterators import xmliter, csviter
 from scrapy.utils.misc import hash_values

-class BasicSpider(BaseSpider):
-    """
-    This class is basically a BaseSpider with support for GUID generating
-    """
-    gen_guid_attribs = []
-
-    def set_guid(self, item):
-        item.guid = hash_values(self.domain_name, *[str(getattr(item, aname) or '') for aname in self.gen_guid_attribs])
-   
-class CrawlSpider(BasicSpider):
+class CrawlSpider(BaseSpider):
    """
    This class works as a base class for spiders that crawl over websites
    """
@ -86,7 +77,15 @@ class CrawlSpider(BasicSpider):
                self.set_guid(entry)
        return ret

-class XMLFeedSpider(BasicSpider):
+    def set_guid(self, item):
+        """
+        This method is called whenever the spider returns items, for each item.
+        It should set the 'guid' attribute to the given item with a string that
+        identifies the item uniquely.
+        """
+        raise NotConfigured('You must define set_guid method in order to scrape items.')
+
+class XMLFeedSpider(BaseSpider):
    """
    This class intends to be the base class for spiders that scrape
    from XML feeds.