more patches sent by Patrick

--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40779
2025-02-26 17:04:07 +00:00 · 2009-01-26 23:42:51 +00:00 · 2009-01-26 23:42:51 +00:00 · 4013497edb
commit 4013497edb
parent c1a1b8945a
2 changed files with 9 additions and 1 deletions
--- a/scrapy/trunk/docs/topics/adaptors.rst
+++ b/scrapy/trunk/docs/topics/adaptors.rst
@ -204,6 +204,7 @@ Ok, done! Let's now sum this up into a spider::
        }

        def parse_page(self, response):
+            items = []
            rows = hxs.x('//tr[child::td[@class="prod_attrib"]]')
            for product in rows:
                item = ScrapedItem()
@ -214,8 +215,9 @@ Ok, done! Let's now sum this up into a spider::
                item.attribute('description', product.x('td[@class="prod_attrib"][3]/text()'))
                item.attribute('weight', product.x('td[@class="prod_attrib"][4]/text()'))
                item.attribute('price', product.x('td[@class="prod_attrib"][5]/text()').re('(\d+)'))
+                items.append(item)

-            return [item]
+            return items

    SPIDER = MySpider()

--- a/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py
+++ b/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py
@ -13,6 +13,12 @@ TEMPLATES_DIR = '%s/templates' % $project_name.__path__[0]
 ENABLED_SPIDERS_FILE = '%s/conf/enabled_spiders.list' % $project_name.__path__[0]
 DEFAULT_ITEM_CLASS = 'scrapy.item.ScrapedItem'
 USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
+
+# The amount of time (in secs) that the downloader should wait before
+# downloading consecutive pages from the same spider. This can be used
+# to throttle the crawling speed to avoid hitting servers too
+# hard. Decimal numbers are supported. Example:
+# DOWNLOAD_DELAY = 2.5
 DOWNLOAD_TIMEOUT = 600

 # use this spider class as default when no spider was found for a given url