1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 13:04:01 +00:00

Merge pull request #98 from kalessin/start_requests

This will break any spider that extends `start_requests` and expect a `list` as return value.

In the other side:

* [Docs](http://doc.scrapy.org/en/latest/topics/spiders.html#scrapy.spider.BaseSpider.start_requests) says that return value is **iterable** not list: 
* Scrapy core already support consuming start_requests generator on demand so we can avoid problems like #47
* it allows extensions to change starting requests on `spider_opened` signal
This commit is contained in:
Daniel Graña 2012-03-05 08:51:22 -08:00
commit cc6e297062
2 changed files with 2 additions and 4 deletions

View File

@ -22,7 +22,7 @@ class SitemapSpider(BaseSpider):
self._follow = [regex(x) for x in self.sitemap_follow]
def start_requests(self):
return [Request(x, callback=self._parse_sitemap) for x in self.sitemap_urls]
return (Request(x, callback=self._parse_sitemap) for x in self.sitemap_urls)
def _parse_sitemap(self, response):
if response.url.endswith('/robots.txt'):

View File

@ -50,10 +50,8 @@ class BaseSpider(object_ref):
return self._settings
def start_requests(self):
reqs = []
for url in self.start_urls:
reqs.extend(arg_to_iter(self.make_requests_from_url(url)))
return reqs
yield self.make_requests_from_url(url)
def make_requests_from_url(self, url):
return Request(url, dont_filter=True)