mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 20:23:53 +00:00
. Modified the default value of the BOT_NAME setting to the project's name
. Modified spider templates to use the already-generated example item instead of a ScrapedItem --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40771
This commit is contained in:
parent
f63a661320
commit
91eff31f18
@ -54,6 +54,7 @@ class Command(ScrapyCommand):
|
||||
def _genspider(self, name, site, template_file):
|
||||
""" Generate spider """
|
||||
tvars = {
|
||||
'project_name': settings.get('BOT_NAME'),
|
||||
'name': name,
|
||||
'site': site,
|
||||
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])
|
||||
|
@ -4,7 +4,7 @@ import $project_name
|
||||
# - Scrapy settings for $project_name -
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BOT_NAME = 'scrapybot'
|
||||
BOT_NAME = '$project_name'
|
||||
BOT_VERSION = '1.0'
|
||||
|
||||
SPIDER_MODULES = ['$project_name.spiders']
|
||||
|
@ -2,9 +2,9 @@
|
||||
import re
|
||||
|
||||
from scrapy.xpath import HtmlXPathSelector
|
||||
from scrapy.item import ScrapedItem
|
||||
from scrapy.link.extractors import RegexLinkExtractor
|
||||
from scrapy.contrib.spiders import CrawlSpider, Rule
|
||||
from $project_name.items import MyItem
|
||||
|
||||
class $classname(CrawlSpider):
|
||||
domain_name = '$site'
|
||||
@ -15,10 +15,11 @@ class $classname(CrawlSpider):
|
||||
)
|
||||
|
||||
def parse_item(self, response):
|
||||
i = MyItem()
|
||||
#xs = HtmlXPathSelector(response)
|
||||
#i = ScrapedItem()
|
||||
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
|
||||
#i.attribute('name', xs.x('//div[@id="name"]'))
|
||||
#i.attribute('description', xs.x('//div[@id="description"]'))
|
||||
return [i]
|
||||
|
||||
SPIDER = $classname()
|
||||
|
@ -1,5 +1,6 @@
|
||||
# -*- coding: utf8 -*-
|
||||
from scrapy.contrib.spiders import CSVFeedSpider
|
||||
from $project_name.items import MyItem
|
||||
|
||||
class $classname(CSVFeedSpider):
|
||||
domain_name = '$site'
|
||||
@ -12,12 +13,10 @@ class $classname(CSVFeedSpider):
|
||||
# return response
|
||||
|
||||
def parse_row(self, response, row):
|
||||
p = self.create_product(response)
|
||||
#p.attribute('site_id', row['id'])
|
||||
#p.attribute('supplier', self.domain_name)
|
||||
#p.attribute('name', row['name'])
|
||||
#p.attribute('description', row['description'])
|
||||
#p.attribute('image_urls', row['image_link'])
|
||||
return p
|
||||
i = MyItem()
|
||||
#i.attribute('url', row['url'])
|
||||
#i.attribute('name', row['name'])
|
||||
#i.attribute('description', row['description'])
|
||||
return i
|
||||
|
||||
SPIDER = $classname()
|
||||
|
@ -1,20 +1,16 @@
|
||||
# -*- coding: utf8 -*-
|
||||
from scrapy.contrib.spiders import XMLFeedSpider
|
||||
from $project_name.items import MyItem
|
||||
|
||||
class $classname(XMLFeedSpider):
|
||||
domain_name = '$site'
|
||||
start_urls = ['http://www.$site/feed.xml']
|
||||
|
||||
def parse_item(self, response, xSel):
|
||||
p = self.create_product(response)
|
||||
#p.attribute('url', xSel(''))
|
||||
#p.attribute('supplier', self.domain_name)
|
||||
#p.attribute('site_id', xSel(''))
|
||||
#p.attribute('name', xSel(''))
|
||||
#p.attribute('description', xSel(''))
|
||||
#p.attribute('image_urls', xSel(''))
|
||||
#p.attribute('price', xSel(''))
|
||||
#p.attribute('dimensions', xSel(''))
|
||||
return p
|
||||
i = MyItem()
|
||||
#i.attribute('url', xSel('url'))
|
||||
#i.attribute('name', xSel('name'))
|
||||
#i.attribute('description', xSel('description'))
|
||||
return i
|
||||
|
||||
SPIDER = $classname()
|
||||
|
Loading…
x
Reference in New Issue
Block a user