mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-21 05:53:15 +00:00
Use Response.follow_all in the documentation where appropiate
This commit is contained in:
parent
6fa8f06b64
commit
dd12f5fdcd
@ -625,12 +625,12 @@ attribute automatically. So the code can be shortened further::
|
||||
for a in response.css('li.next a'):
|
||||
yield response.follow(a, callback=self.parse)
|
||||
|
||||
.. note::
|
||||
To create multiple requests from an iterable, you can use
|
||||
:meth:`response.follow_all <scrapy.http.TextResponse.follow_all>` instead::
|
||||
|
||||
links = response.css('li.next a')
|
||||
yield from response.follow_all(links, callback=self.parse)
|
||||
|
||||
``response.follow(response.css('li.next a'))`` is not valid because
|
||||
``response.css`` returns a list-like object with selectors for all results,
|
||||
not a single selector. A ``for`` loop like in the example above, or
|
||||
``response.follow(response.css('li.next a')[0])`` is fine.
|
||||
|
||||
More examples and patterns
|
||||
--------------------------
|
||||
@ -647,13 +647,11 @@ this time for scraping author information::
|
||||
start_urls = ['http://quotes.toscrape.com/']
|
||||
|
||||
def parse(self, response):
|
||||
# follow links to author pages
|
||||
for href in response.css('.author + a::attr(href)'):
|
||||
yield response.follow(href, self.parse_author)
|
||||
author_page_links = response.css('.author + a')
|
||||
yield from response.follow_all(author_links, self.parse_author)
|
||||
|
||||
# follow pagination links
|
||||
for href in response.css('li.next a::attr(href)'):
|
||||
yield response.follow(href, self.parse)
|
||||
pagination_links = response.css('li.next a')
|
||||
yield from response.follow_all(pagination_links, self.parse)
|
||||
|
||||
def parse_author(self, response):
|
||||
def extract_with_css(query):
|
||||
@ -669,8 +667,10 @@ This spider will start from the main page, it will follow all the links to the
|
||||
authors pages calling the ``parse_author`` callback for each of them, and also
|
||||
the pagination links with the ``parse`` callback as we saw before.
|
||||
|
||||
Here we're passing callbacks to ``response.follow`` as positional arguments
|
||||
to make the code shorter; it also works for ``scrapy.Request``.
|
||||
Here we're passing callbacks to
|
||||
:meth:`response.follow_all <scrapy.http.TextResponse.follow_all>` as positional
|
||||
arguments to make the code shorter; it also works for
|
||||
:class:`~scrapy.http.Request`.
|
||||
|
||||
The ``parse_author`` callback defines a helper function to extract and cleanup the
|
||||
data from a CSS query and yields the Python dict with the author data.
|
||||
|
Loading…
x
Reference in New Issue
Block a user