mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 22:04:01 +00:00
--HG-- rename : scrapy/trunk/scrapy/conf/project_template/__init__.py => scrapy/trunk/scrapy/templates/project/module/__init__.py rename : scrapy/trunk/scrapy/conf/project_template/items.py => scrapy/trunk/scrapy/templates/project/module/items.py rename : scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py => scrapy/trunk/scrapy/templates/project/module/settings.py rename : scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py => scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40819
This commit is contained in:
parent
bccef8a463
commit
f7dac0e449
@ -3,10 +3,11 @@
|
|||||||
tasks"""
|
tasks"""
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import string
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
|
|
||||||
import scrapy
|
import scrapy
|
||||||
from scrapy.utils.misc import render_templatefile
|
from scrapy.utils.misc import render_templatefile, string_camelcase
|
||||||
|
|
||||||
usage = """
|
usage = """
|
||||||
scrapy-admin.py [options] [command]
|
scrapy-admin.py [options] [command]
|
||||||
@ -17,8 +18,15 @@ Available commands:
|
|||||||
Starts a new project with name 'project_name'
|
Starts a new project with name 'project_name'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
|
||||||
|
|
||||||
|
# This is the list of templatefile's path that are rendered *after copying* to
|
||||||
|
# project directory.
|
||||||
TEMPLATES = (
|
TEMPLATES = (
|
||||||
'scrapy_settings.py',
|
'scrapy-ctl.py',
|
||||||
|
'${project_name}/settings.py',
|
||||||
|
'${project_name}/items.py',
|
||||||
|
'${project_name}/pipelines.py',
|
||||||
)
|
)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -33,10 +41,20 @@ def main():
|
|||||||
if cmd == "startproject":
|
if cmd == "startproject":
|
||||||
if len(args) >= 2:
|
if len(args) >= 2:
|
||||||
project_name = args[1]
|
project_name = args[1]
|
||||||
project_tplpath = os.path.join(scrapy.__path__[0], "conf", "project_template")
|
project_root_path = project_name
|
||||||
shutil.copytree(project_tplpath, project_name)
|
project_module_path = '%s/%s' % (project_name, project_name)
|
||||||
|
|
||||||
|
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
|
||||||
|
shutil.copytree(roottpl, project_name)
|
||||||
|
|
||||||
|
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
|
||||||
|
shutil.copytree(moduletpl, '%s/%s' % (project_name, project_name))
|
||||||
|
|
||||||
for path in TEMPLATES:
|
for path in TEMPLATES:
|
||||||
render_templatefile(os.path.join(project_name, path), project_name=project_name)
|
tplfile = os.path.join(project_root_path,
|
||||||
|
string.Template(path).substitute(project_name=project_name))
|
||||||
|
render_templatefile(tplfile, project_name=project_name,
|
||||||
|
ProjectName=string_camelcase(project_name))
|
||||||
else:
|
else:
|
||||||
print "scrapy-admin.py: missing project name"
|
print "scrapy-admin.py: missing project name"
|
||||||
else:
|
else:
|
||||||
|
@ -7,7 +7,8 @@ import shutil
|
|||||||
from scrapy.spider import spiders
|
from scrapy.spider import spiders
|
||||||
from scrapy.command import ScrapyCommand
|
from scrapy.command import ScrapyCommand
|
||||||
from scrapy.conf import settings
|
from scrapy.conf import settings
|
||||||
from scrapy.utils.misc import render_templatefile
|
from scrapy.utils.misc import render_templatefile, string_camelcase
|
||||||
|
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
""" Childs can define custom tvars """
|
""" Childs can define custom tvars """
|
||||||
@ -54,7 +55,8 @@ class Command(ScrapyCommand):
|
|||||||
def _genspider(self, name, site, template_file):
|
def _genspider(self, name, site, template_file):
|
||||||
""" Generate spider """
|
""" Generate spider """
|
||||||
tvars = {
|
tvars = {
|
||||||
'project_name': settings.get('BOT_NAME'),
|
'project_name': settings.get('PROJECT_NAME'),
|
||||||
|
'ProjectName': string_camelcase(settings.get('PROJECT_NAME')),
|
||||||
'name': name,
|
'name': name,
|
||||||
'site': site,
|
'site': site,
|
||||||
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])
|
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])
|
||||||
|
@ -1,4 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from scrapy.command.cmdline import execute
|
|
||||||
execute()
|
|
@ -2,5 +2,5 @@
|
|||||||
|
|
||||||
from scrapy.item import ScrapedItem
|
from scrapy.item import ScrapedItem
|
||||||
|
|
||||||
class MyItem(ScrapedItem):
|
class ${ProjectName}Item(ScrapedItem):
|
||||||
pass
|
pass
|
@ -0,0 +1,5 @@
|
|||||||
|
# Define yours item pipelines here
|
||||||
|
|
||||||
|
class ${ProjectName}Pipeline(object):
|
||||||
|
def process_item(self, domain, item):
|
||||||
|
return item
|
@ -3,8 +3,9 @@ import $project_name
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# - Scrapy settings for $project_name -
|
# - Scrapy settings for $project_name -
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
PROJECT_NAME = '$project_name'
|
||||||
|
|
||||||
BOT_NAME = '$project_name'
|
BOT_NAME = PROJECT_NAME
|
||||||
BOT_VERSION = '1.0'
|
BOT_VERSION = '1.0'
|
||||||
|
|
||||||
SPIDER_MODULES = ['$project_name.spiders']
|
SPIDER_MODULES = ['$project_name.spiders']
|
||||||
@ -79,9 +80,9 @@ SPIDER_MIDDLEWARES = (
|
|||||||
# Spider side
|
# Spider side
|
||||||
)
|
)
|
||||||
|
|
||||||
# Item pipelines are usually configured by commands (see conf/commands)
|
ITEM_PIPELINES = (
|
||||||
#ITEM_PIPELINES = (
|
'${project_name}.pipelines.${ProjectName}Pipeline',
|
||||||
#)
|
)
|
||||||
|
|
||||||
#DEPTH_LIMIT = 10 # limit the maximum link depth to follow
|
#DEPTH_LIMIT = 10 # limit the maximum link depth to follow
|
||||||
#DEPTH_STATS = 1 # enable depth stats
|
#DEPTH_STATS = 1 # enable depth stats
|
@ -4,7 +4,7 @@ import re
|
|||||||
from scrapy.xpath import HtmlXPathSelector
|
from scrapy.xpath import HtmlXPathSelector
|
||||||
from scrapy.link.extractors import RegexLinkExtractor
|
from scrapy.link.extractors import RegexLinkExtractor
|
||||||
from scrapy.contrib.spiders import CrawlSpider, Rule
|
from scrapy.contrib.spiders import CrawlSpider, Rule
|
||||||
from $project_name.items import MyItem
|
from $project_name.items import ${ProjectName}Item
|
||||||
|
|
||||||
class $classname(CrawlSpider):
|
class $classname(CrawlSpider):
|
||||||
domain_name = '$site'
|
domain_name = '$site'
|
||||||
@ -15,7 +15,7 @@ class $classname(CrawlSpider):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def parse_item(self, response):
|
def parse_item(self, response):
|
||||||
i = MyItem()
|
i = ${ProjectName}Item()
|
||||||
#xs = HtmlXPathSelector(response)
|
#xs = HtmlXPathSelector(response)
|
||||||
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
|
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
|
||||||
#i.attribute('name', xs.x('//div[@id="name"]'))
|
#i.attribute('name', xs.x('//div[@id="name"]'))
|
@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf8 -*-
|
# -*- coding: utf8 -*-
|
||||||
from scrapy.contrib.spiders import CSVFeedSpider
|
from scrapy.contrib.spiders import CSVFeedSpider
|
||||||
from $project_name.items import MyItem
|
from $project_name.items import ${ProjectName}Item
|
||||||
|
|
||||||
class $classname(CSVFeedSpider):
|
class $classname(CSVFeedSpider):
|
||||||
domain_name = '$site'
|
domain_name = '$site'
|
||||||
@ -13,7 +13,7 @@ class $classname(CSVFeedSpider):
|
|||||||
# return response
|
# return response
|
||||||
|
|
||||||
def parse_row(self, response, row):
|
def parse_row(self, response, row):
|
||||||
i = MyItem()
|
i = ${ProjectName}Item()
|
||||||
#i.attribute('url', row['url'])
|
#i.attribute('url', row['url'])
|
||||||
#i.attribute('name', row['name'])
|
#i.attribute('name', row['name'])
|
||||||
#i.attribute('description', row['description'])
|
#i.attribute('description', row['description'])
|
@ -1,13 +1,13 @@
|
|||||||
# -*- coding: utf8 -*-
|
# -*- coding: utf8 -*-
|
||||||
from scrapy.contrib.spiders import XMLFeedSpider
|
from scrapy.contrib.spiders import XMLFeedSpider
|
||||||
from $project_name.items import MyItem
|
from $project_name.items import ${ProjectName}Item
|
||||||
|
|
||||||
class $classname(XMLFeedSpider):
|
class $classname(XMLFeedSpider):
|
||||||
domain_name = '$site'
|
domain_name = '$site'
|
||||||
start_urls = ['http://www.$site/feed.xml']
|
start_urls = ['http://www.$site/feed.xml']
|
||||||
|
|
||||||
def parse_item(self, response, xSel):
|
def parse_item(self, response, xSel):
|
||||||
i = MyItem()
|
i = ${ProjectName}Item()
|
||||||
#i.attribute('url', xSel('url'))
|
#i.attribute('url', xSel('url'))
|
||||||
#i.attribute('name', xSel('name'))
|
#i.attribute('name', xSel('name'))
|
||||||
#i.attribute('description', xSel('description'))
|
#i.attribute('description', xSel('description'))
|
7
scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py
Executable file
7
scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
os.environ.setdefault('SCRAPYSETTINGS_MODULE', '${project_name}.settings')
|
||||||
|
|
||||||
|
from scrapy.command.cmdline import execute
|
||||||
|
execute()
|
@ -153,3 +153,8 @@ def items_to_csv(file, items, delimiter=';', headers=None):
|
|||||||
value = unicode_to_str(value) if isinstance(value, basestring) else value
|
value = unicode_to_str(value) if isinstance(value, basestring) else value
|
||||||
row.append(value)
|
row.append(value)
|
||||||
csv_file.writerow(row)
|
csv_file.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z]')
|
||||||
|
def string_camelcase(string):
|
||||||
|
return CAMELCASE_INVALID_CHARS.sub('', string.title())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user