Renamed LinkExtractors extract_urls method to extract_links

--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40594
2025-02-27 03:03:47 +00:00 · 2009-01-02 02:34:44 +00:00 · 2009-01-02 02:34:44 +00:00 · 91a23e61bf
commit 91a23e61bf
parent c82c799d07
5 changed files with 9 additions and 9 deletions
--- a/scrapy/trunk/scrapy/contrib/spiders/crawl.py
+++ b/scrapy/trunk/scrapy/contrib/spiders/crawl.py
@ -85,7 +85,7 @@ class CrawlSpider(BaseSpider):
        requests = []
        seen = set()
        for rule in self._rules:
-            links = [l for l in rule.link_extractor.extract_urls(response) if l not in seen]
+            links = [l for l in rule.link_extractor.extract_links(response) if l not in seen]
            if links and rule.process_links:
                links = rule.process_links(links)
            seen = seen.union(links)
--- a/scrapy/trunk/scrapy/link/init.py
+++ b/scrapy/trunk/scrapy/link/init.py
@ -7,7 +7,7 @@ from scrapy.utils.url import urljoin_rfc as urljoin

 class LinkExtractor(FixedSGMLParser):
    """LinkExtractor are used to extract links from web pages. They are
-    instantiated and later "applied" to a Response using the extract_urls
+    instantiated and later "applied" to a Response using the extract_links
    method which must receive a Response object and return a dict whoose keys
    are the (absolute) urls to follow, and its values any arbitrary data. In
    this case the values are the text of the hyperlink.
@ -16,7 +16,7 @@ class LinkExtractor(FixedSGMLParser):
    functionality for extracting links to follow, but you could override this
    class or create a new one if you need some additional functionality. The
    only requisite is that the new (or overrided) class must provide a
-    extract_urls method that receives a Response and returns a dict with the
+    extract_links method that receives a Response and returns a dict with the
    links to follow as its keys.

    The constructor arguments are:
@ -35,7 +35,7 @@ class LinkExtractor(FixedSGMLParser):
        self.scan_attr = attr if callable(attr) else lambda a: a == attr
        self.current_link = None

-    def extract_urls(self, response, unique=False):
+    def extract_links(self, response, unique=False):
        self.reset()
        self.unique = unique
        self.feed(response.body.to_string())
--- a/scrapy/trunk/scrapy/link/extractors.py
+++ b/scrapy/trunk/scrapy/link/extractors.py
@ -54,11 +54,11 @@ class RegexLinkExtractor(LinkExtractor):
        attr_func = lambda x: x in attrs
        LinkExtractor.__init__(self, tag=tag_func, attr=attr_func)

-    def extract_urls(self, response, unique=True):
+    def extract_links(self, response, unique=True):
        if self.restrict_xpaths:
            response = new_response_from_xpaths(response, self.restrict_xpaths)

-        links = LinkExtractor.extract_urls(self, response, unique)
+        links = LinkExtractor.extract_links(self, response, unique)
        links = [link for link in links if _is_valid_url(link.url)]

        if self.allow_res:
--- a/scrapy/trunk/scrapy/tests/test_link.py
+++ b/scrapy/trunk/scrapy/tests/test_link.py
@ -17,7 +17,7 @@ class LinkExtractorTestCase(unittest.TestCase):
        response = Response("example.org", "http://example.org/somepage/index.html", body=html)

        lx = LinkExtractor()  # default: tag=a, attr=href
-        self.assertEqual(lx.extract_urls(response), 
+        self.assertEqual(lx.extract_links(response),
                         [Link(url='http://example.org/somepage/item/12.html', text='Item 12'), 
                          Link(url='http://example.org/about.html', text='About us'),
                          Link(url='http://example.org/othercat.html', text='Other category'), 
@ -30,7 +30,7 @@ class LinkExtractorTestCase(unittest.TestCase):
        response = Response("example.org", "http://example.org/somepage/index.html", body=html)

        lx = LinkExtractor()  # default: tag=a, attr=href
-        self.assertEqual(lx.extract_urls(response), 
+        self.assertEqual(lx.extract_links(response),
                         [Link(url='http://otherdomain.com/base/item/12.html', text='Item 12')])

    def test_matches(self):
--- a/scrapy/trunk/scrapy/tests/test_spiders/testplugin.py
+++ b/scrapy/trunk/scrapy/tests/test_spiders/testplugin.py
@ -22,7 +22,7 @@ class TestSpider(BaseSpider):
    def parse(self, response):
        xlink = LinkExtractor()
        itemre = re.compile(self.itemurl_re)
-        for link in xlink.extract_urls(response):
+        for link in xlink.extract_links(response):
            if itemre.search(link.url):
                yield Request(url=link.url, callback=self.parse_item)