mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 14:44:08 +00:00
--HG-- rename : scrapy/trunk/scrapy/conf/project_template/__init__.py => scrapy/trunk/scrapy/templates/project/module/__init__.py rename : scrapy/trunk/scrapy/conf/project_template/items.py => scrapy/trunk/scrapy/templates/project/module/items.py rename : scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py => scrapy/trunk/scrapy/templates/project/module/settings.py rename : scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py => scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40819
This commit is contained in:
parent
bccef8a463
commit
f7dac0e449
@ -3,10 +3,11 @@
|
||||
tasks"""
|
||||
import os
|
||||
import shutil
|
||||
import string
|
||||
from optparse import OptionParser
|
||||
|
||||
import scrapy
|
||||
from scrapy.utils.misc import render_templatefile
|
||||
from scrapy.utils.misc import render_templatefile, string_camelcase
|
||||
|
||||
usage = """
|
||||
scrapy-admin.py [options] [command]
|
||||
@ -17,14 +18,21 @@ Available commands:
|
||||
Starts a new project with name 'project_name'
|
||||
"""
|
||||
|
||||
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
|
||||
|
||||
# This is the list of templatefile's path that are rendered *after copying* to
|
||||
# project directory.
|
||||
TEMPLATES = (
|
||||
'scrapy_settings.py',
|
||||
'scrapy-ctl.py',
|
||||
'${project_name}/settings.py',
|
||||
'${project_name}/items.py',
|
||||
'${project_name}/pipelines.py',
|
||||
)
|
||||
|
||||
def main():
|
||||
parser = OptionParser(usage=usage)
|
||||
opts, args = parser.parse_args()
|
||||
|
||||
|
||||
if not args:
|
||||
parser.print_help()
|
||||
return
|
||||
@ -33,10 +41,20 @@ def main():
|
||||
if cmd == "startproject":
|
||||
if len(args) >= 2:
|
||||
project_name = args[1]
|
||||
project_tplpath = os.path.join(scrapy.__path__[0], "conf", "project_template")
|
||||
shutil.copytree(project_tplpath, project_name)
|
||||
project_root_path = project_name
|
||||
project_module_path = '%s/%s' % (project_name, project_name)
|
||||
|
||||
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
|
||||
shutil.copytree(roottpl, project_name)
|
||||
|
||||
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
|
||||
shutil.copytree(moduletpl, '%s/%s' % (project_name, project_name))
|
||||
|
||||
for path in TEMPLATES:
|
||||
render_templatefile(os.path.join(project_name, path), project_name=project_name)
|
||||
tplfile = os.path.join(project_root_path,
|
||||
string.Template(path).substitute(project_name=project_name))
|
||||
render_templatefile(tplfile, project_name=project_name,
|
||||
ProjectName=string_camelcase(project_name))
|
||||
else:
|
||||
print "scrapy-admin.py: missing project name"
|
||||
else:
|
||||
|
@ -7,7 +7,8 @@ import shutil
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.command import ScrapyCommand
|
||||
from scrapy.conf import settings
|
||||
from scrapy.utils.misc import render_templatefile
|
||||
from scrapy.utils.misc import render_templatefile, string_camelcase
|
||||
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
""" Childs can define custom tvars """
|
||||
@ -54,7 +55,8 @@ class Command(ScrapyCommand):
|
||||
def _genspider(self, name, site, template_file):
|
||||
""" Generate spider """
|
||||
tvars = {
|
||||
'project_name': settings.get('BOT_NAME'),
|
||||
'project_name': settings.get('PROJECT_NAME'),
|
||||
'ProjectName': string_camelcase(settings.get('PROJECT_NAME')),
|
||||
'name': name,
|
||||
'site': site,
|
||||
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])
|
||||
|
@ -1,4 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from scrapy.command.cmdline import execute
|
||||
execute()
|
@ -2,5 +2,5 @@
|
||||
|
||||
from scrapy.item import ScrapedItem
|
||||
|
||||
class MyItem(ScrapedItem):
|
||||
class ${ProjectName}Item(ScrapedItem):
|
||||
pass
|
@ -0,0 +1,5 @@
|
||||
# Define yours item pipelines here
|
||||
|
||||
class ${ProjectName}Pipeline(object):
|
||||
def process_item(self, domain, item):
|
||||
return item
|
@ -3,8 +3,9 @@ import $project_name
|
||||
# ---------------------------------------------------------------------------
|
||||
# - Scrapy settings for $project_name -
|
||||
# ---------------------------------------------------------------------------
|
||||
PROJECT_NAME = '$project_name'
|
||||
|
||||
BOT_NAME = '$project_name'
|
||||
BOT_NAME = PROJECT_NAME
|
||||
BOT_VERSION = '1.0'
|
||||
|
||||
SPIDER_MODULES = ['$project_name.spiders']
|
||||
@ -79,9 +80,9 @@ SPIDER_MIDDLEWARES = (
|
||||
# Spider side
|
||||
)
|
||||
|
||||
# Item pipelines are usually configured by commands (see conf/commands)
|
||||
#ITEM_PIPELINES = (
|
||||
#)
|
||||
ITEM_PIPELINES = (
|
||||
'${project_name}.pipelines.${ProjectName}Pipeline',
|
||||
)
|
||||
|
||||
#DEPTH_LIMIT = 10 # limit the maximum link depth to follow
|
||||
#DEPTH_STATS = 1 # enable depth stats
|
@ -4,7 +4,7 @@ import re
|
||||
from scrapy.xpath import HtmlXPathSelector
|
||||
from scrapy.link.extractors import RegexLinkExtractor
|
||||
from scrapy.contrib.spiders import CrawlSpider, Rule
|
||||
from $project_name.items import MyItem
|
||||
from $project_name.items import ${ProjectName}Item
|
||||
|
||||
class $classname(CrawlSpider):
|
||||
domain_name = '$site'
|
||||
@ -15,7 +15,7 @@ class $classname(CrawlSpider):
|
||||
)
|
||||
|
||||
def parse_item(self, response):
|
||||
i = MyItem()
|
||||
i = ${ProjectName}Item()
|
||||
#xs = HtmlXPathSelector(response)
|
||||
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
|
||||
#i.attribute('name', xs.x('//div[@id="name"]'))
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf8 -*-
|
||||
from scrapy.contrib.spiders import CSVFeedSpider
|
||||
from $project_name.items import MyItem
|
||||
from $project_name.items import ${ProjectName}Item
|
||||
|
||||
class $classname(CSVFeedSpider):
|
||||
domain_name = '$site'
|
||||
@ -13,7 +13,7 @@ class $classname(CSVFeedSpider):
|
||||
# return response
|
||||
|
||||
def parse_row(self, response, row):
|
||||
i = MyItem()
|
||||
i = ${ProjectName}Item()
|
||||
#i.attribute('url', row['url'])
|
||||
#i.attribute('name', row['name'])
|
||||
#i.attribute('description', row['description'])
|
@ -1,13 +1,13 @@
|
||||
# -*- coding: utf8 -*-
|
||||
from scrapy.contrib.spiders import XMLFeedSpider
|
||||
from $project_name.items import MyItem
|
||||
from $project_name.items import ${ProjectName}Item
|
||||
|
||||
class $classname(XMLFeedSpider):
|
||||
domain_name = '$site'
|
||||
start_urls = ['http://www.$site/feed.xml']
|
||||
|
||||
def parse_item(self, response, xSel):
|
||||
i = MyItem()
|
||||
i = ${ProjectName}Item()
|
||||
#i.attribute('url', xSel('url'))
|
||||
#i.attribute('name', xSel('name'))
|
||||
#i.attribute('description', xSel('description'))
|
7
scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py
Executable file
7
scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py
Executable file
@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
os.environ.setdefault('SCRAPYSETTINGS_MODULE', '${project_name}.settings')
|
||||
|
||||
from scrapy.command.cmdline import execute
|
||||
execute()
|
@ -153,3 +153,8 @@ def items_to_csv(file, items, delimiter=';', headers=None):
|
||||
value = unicode_to_str(value) if isinstance(value, basestring) else value
|
||||
row.append(value)
|
||||
csv_file.writerow(row)
|
||||
|
||||
|
||||
CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z]')
|
||||
def string_camelcase(string):
|
||||
return CAMELCASE_INVALID_CHARS.sub('', string.title())
|
||||
|
Loading…
x
Reference in New Issue
Block a user