1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 17:24:38 +00:00

. Modified the default value of the BOT_NAME setting to the project's name

. Modified spider templates to use the already-generated example item instead of a ScrapedItem

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40771
This commit is contained in:
elpolilla 2009-01-26 15:24:52 +00:00
parent f63a661320
commit 91eff31f18
5 changed files with 17 additions and 20 deletions

View File

@ -54,6 +54,7 @@ class Command(ScrapyCommand):
def _genspider(self, name, site, template_file):
""" Generate spider """
tvars = {
'project_name': settings.get('BOT_NAME'),
'name': name,
'site': site,
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])

View File

@ -4,7 +4,7 @@ import $project_name
# - Scrapy settings for $project_name -
# ---------------------------------------------------------------------------
BOT_NAME = 'scrapybot'
BOT_NAME = '$project_name'
BOT_VERSION = '1.0'
SPIDER_MODULES = ['$project_name.spiders']

View File

@ -2,9 +2,9 @@
import re
from scrapy.xpath import HtmlXPathSelector
from scrapy.item import ScrapedItem
from scrapy.link.extractors import RegexLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from $project_name.items import MyItem
class $classname(CrawlSpider):
domain_name = '$site'
@ -15,10 +15,11 @@ class $classname(CrawlSpider):
)
def parse_item(self, response):
i = MyItem()
#xs = HtmlXPathSelector(response)
#i = ScrapedItem()
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
#i.attribute('name', xs.x('//div[@id="name"]'))
#i.attribute('description', xs.x('//div[@id="description"]'))
return [i]
SPIDER = $classname()

View File

@ -1,5 +1,6 @@
# -*- coding: utf8 -*-
from scrapy.contrib.spiders import CSVFeedSpider
from $project_name.items import MyItem
class $classname(CSVFeedSpider):
domain_name = '$site'
@ -12,12 +13,10 @@ class $classname(CSVFeedSpider):
# return response
def parse_row(self, response, row):
p = self.create_product(response)
#p.attribute('site_id', row['id'])
#p.attribute('supplier', self.domain_name)
#p.attribute('name', row['name'])
#p.attribute('description', row['description'])
#p.attribute('image_urls', row['image_link'])
return p
i = MyItem()
#i.attribute('url', row['url'])
#i.attribute('name', row['name'])
#i.attribute('description', row['description'])
return i
SPIDER = $classname()

View File

@ -1,20 +1,16 @@
# -*- coding: utf8 -*-
from scrapy.contrib.spiders import XMLFeedSpider
from $project_name.items import MyItem
class $classname(XMLFeedSpider):
domain_name = '$site'
start_urls = ['http://www.$site/feed.xml']
def parse_item(self, response, xSel):
p = self.create_product(response)
#p.attribute('url', xSel(''))
#p.attribute('supplier', self.domain_name)
#p.attribute('site_id', xSel(''))
#p.attribute('name', xSel(''))
#p.attribute('description', xSel(''))
#p.attribute('image_urls', xSel(''))
#p.attribute('price', xSel(''))
#p.attribute('dimensions', xSel(''))
return p
i = MyItem()
#i.attribute('url', xSel('url'))
#i.attribute('name', xSel('name'))
#i.attribute('description', xSel('description'))
return i
SPIDER = $classname()