diff --git a/scrapy/trunk/docs/topics/adaptors.rst b/scrapy/trunk/docs/topics/adaptors.rst index 96306ed33..f89b619a7 100644 --- a/scrapy/trunk/docs/topics/adaptors.rst +++ b/scrapy/trunk/docs/topics/adaptors.rst @@ -204,6 +204,7 @@ Ok, done! Let's now sum this up into a spider:: } def parse_page(self, response): + items = [] rows = hxs.x('//tr[child::td[@class="prod_attrib"]]') for product in rows: item = ScrapedItem() @@ -214,8 +215,9 @@ Ok, done! Let's now sum this up into a spider:: item.attribute('description', product.x('td[@class="prod_attrib"][3]/text()')) item.attribute('weight', product.x('td[@class="prod_attrib"][4]/text()')) item.attribute('price', product.x('td[@class="prod_attrib"][5]/text()').re('(\d+)')) + items.append(item) - return [item] + return items SPIDER = MySpider() diff --git a/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py b/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py index b60eddfe5..4b88158f4 100644 --- a/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py +++ b/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py @@ -13,6 +13,12 @@ TEMPLATES_DIR = '%s/templates' % $project_name.__path__[0] ENABLED_SPIDERS_FILE = '%s/conf/enabled_spiders.list' % $project_name.__path__[0] DEFAULT_ITEM_CLASS = 'scrapy.item.ScrapedItem' USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION) + +# The amount of time (in secs) that the downloader should wait before +# downloading consecutive pages from the same spider. This can be used +# to throttle the crawling speed to avoid hitting servers too +# hard. Decimal numbers are supported. Example: +# DOWNLOAD_DELAY = 2.5 DOWNLOAD_TIMEOUT = 600 # use this spider class as default when no spider was found for a given url