1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 22:04:01 +00:00

Creates an usable project structure on startproject. closes #58 and closes #54

--HG--
rename : scrapy/trunk/scrapy/conf/project_template/__init__.py => scrapy/trunk/scrapy/templates/project/module/__init__.py
rename : scrapy/trunk/scrapy/conf/project_template/items.py => scrapy/trunk/scrapy/templates/project/module/items.py
rename : scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py => scrapy/trunk/scrapy/templates/project/module/settings.py
rename : scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py => scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40819
This commit is contained in:
Daniel Grana 2009-02-05 12:12:15 +00:00
parent bccef8a463
commit f7dac0e449
14 changed files with 57 additions and 23 deletions

View File

@ -3,10 +3,11 @@
tasks""" tasks"""
import os import os
import shutil import shutil
import string
from optparse import OptionParser from optparse import OptionParser
import scrapy import scrapy
from scrapy.utils.misc import render_templatefile from scrapy.utils.misc import render_templatefile, string_camelcase
usage = """ usage = """
scrapy-admin.py [options] [command] scrapy-admin.py [options] [command]
@ -17,8 +18,15 @@ Available commands:
Starts a new project with name 'project_name' Starts a new project with name 'project_name'
""" """
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
# This is the list of templatefile's path that are rendered *after copying* to
# project directory.
TEMPLATES = ( TEMPLATES = (
'scrapy_settings.py', 'scrapy-ctl.py',
'${project_name}/settings.py',
'${project_name}/items.py',
'${project_name}/pipelines.py',
) )
def main(): def main():
@ -33,10 +41,20 @@ def main():
if cmd == "startproject": if cmd == "startproject":
if len(args) >= 2: if len(args) >= 2:
project_name = args[1] project_name = args[1]
project_tplpath = os.path.join(scrapy.__path__[0], "conf", "project_template") project_root_path = project_name
shutil.copytree(project_tplpath, project_name) project_module_path = '%s/%s' % (project_name, project_name)
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
shutil.copytree(roottpl, project_name)
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
shutil.copytree(moduletpl, '%s/%s' % (project_name, project_name))
for path in TEMPLATES: for path in TEMPLATES:
render_templatefile(os.path.join(project_name, path), project_name=project_name) tplfile = os.path.join(project_root_path,
string.Template(path).substitute(project_name=project_name))
render_templatefile(tplfile, project_name=project_name,
ProjectName=string_camelcase(project_name))
else: else:
print "scrapy-admin.py: missing project name" print "scrapy-admin.py: missing project name"
else: else:

View File

@ -7,7 +7,8 @@ import shutil
from scrapy.spider import spiders from scrapy.spider import spiders
from scrapy.command import ScrapyCommand from scrapy.command import ScrapyCommand
from scrapy.conf import settings from scrapy.conf import settings
from scrapy.utils.misc import render_templatefile from scrapy.utils.misc import render_templatefile, string_camelcase
class Command(ScrapyCommand): class Command(ScrapyCommand):
""" Childs can define custom tvars """ """ Childs can define custom tvars """
@ -54,7 +55,8 @@ class Command(ScrapyCommand):
def _genspider(self, name, site, template_file): def _genspider(self, name, site, template_file):
""" Generate spider """ """ Generate spider """
tvars = { tvars = {
'project_name': settings.get('BOT_NAME'), 'project_name': settings.get('PROJECT_NAME'),
'ProjectName': string_camelcase(settings.get('PROJECT_NAME')),
'name': name, 'name': name,
'site': site, 'site': site,
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')]) 'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])

View File

@ -1,4 +0,0 @@
#!/usr/bin/env python
from scrapy.command.cmdline import execute
execute()

View File

@ -2,5 +2,5 @@
from scrapy.item import ScrapedItem from scrapy.item import ScrapedItem
class MyItem(ScrapedItem): class ${ProjectName}Item(ScrapedItem):
pass pass

View File

@ -0,0 +1,5 @@
# Define yours item pipelines here
class ${ProjectName}Pipeline(object):
def process_item(self, domain, item):
return item

View File

@ -3,8 +3,9 @@ import $project_name
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# - Scrapy settings for $project_name - # - Scrapy settings for $project_name -
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
PROJECT_NAME = '$project_name'
BOT_NAME = '$project_name' BOT_NAME = PROJECT_NAME
BOT_VERSION = '1.0' BOT_VERSION = '1.0'
SPIDER_MODULES = ['$project_name.spiders'] SPIDER_MODULES = ['$project_name.spiders']
@ -79,9 +80,9 @@ SPIDER_MIDDLEWARES = (
# Spider side # Spider side
) )
# Item pipelines are usually configured by commands (see conf/commands) ITEM_PIPELINES = (
#ITEM_PIPELINES = ( '${project_name}.pipelines.${ProjectName}Pipeline',
#) )
#DEPTH_LIMIT = 10 # limit the maximum link depth to follow #DEPTH_LIMIT = 10 # limit the maximum link depth to follow
#DEPTH_STATS = 1 # enable depth stats #DEPTH_STATS = 1 # enable depth stats

View File

@ -4,7 +4,7 @@ import re
from scrapy.xpath import HtmlXPathSelector from scrapy.xpath import HtmlXPathSelector
from scrapy.link.extractors import RegexLinkExtractor from scrapy.link.extractors import RegexLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule from scrapy.contrib.spiders import CrawlSpider, Rule
from $project_name.items import MyItem from $project_name.items import ${ProjectName}Item
class $classname(CrawlSpider): class $classname(CrawlSpider):
domain_name = '$site' domain_name = '$site'
@ -15,7 +15,7 @@ class $classname(CrawlSpider):
) )
def parse_item(self, response): def parse_item(self, response):
i = MyItem() i = ${ProjectName}Item()
#xs = HtmlXPathSelector(response) #xs = HtmlXPathSelector(response)
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value')) #i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
#i.attribute('name', xs.x('//div[@id="name"]')) #i.attribute('name', xs.x('//div[@id="name"]'))

View File

@ -1,6 +1,6 @@
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
from scrapy.contrib.spiders import CSVFeedSpider from scrapy.contrib.spiders import CSVFeedSpider
from $project_name.items import MyItem from $project_name.items import ${ProjectName}Item
class $classname(CSVFeedSpider): class $classname(CSVFeedSpider):
domain_name = '$site' domain_name = '$site'
@ -13,7 +13,7 @@ class $classname(CSVFeedSpider):
# return response # return response
def parse_row(self, response, row): def parse_row(self, response, row):
i = MyItem() i = ${ProjectName}Item()
#i.attribute('url', row['url']) #i.attribute('url', row['url'])
#i.attribute('name', row['name']) #i.attribute('name', row['name'])
#i.attribute('description', row['description']) #i.attribute('description', row['description'])

View File

@ -1,13 +1,13 @@
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
from scrapy.contrib.spiders import XMLFeedSpider from scrapy.contrib.spiders import XMLFeedSpider
from $project_name.items import MyItem from $project_name.items import ${ProjectName}Item
class $classname(XMLFeedSpider): class $classname(XMLFeedSpider):
domain_name = '$site' domain_name = '$site'
start_urls = ['http://www.$site/feed.xml'] start_urls = ['http://www.$site/feed.xml']
def parse_item(self, response, xSel): def parse_item(self, response, xSel):
i = MyItem() i = ${ProjectName}Item()
#i.attribute('url', xSel('url')) #i.attribute('url', xSel('url'))
#i.attribute('name', xSel('name')) #i.attribute('name', xSel('name'))
#i.attribute('description', xSel('description')) #i.attribute('description', xSel('description'))

View File

@ -0,0 +1,7 @@
#!/usr/bin/env python
import os
os.environ.setdefault('SCRAPYSETTINGS_MODULE', '${project_name}.settings')
from scrapy.command.cmdline import execute
execute()

View File

@ -153,3 +153,8 @@ def items_to_csv(file, items, delimiter=';', headers=None):
value = unicode_to_str(value) if isinstance(value, basestring) else value value = unicode_to_str(value) if isinstance(value, basestring) else value
row.append(value) row.append(value)
csv_file.writerow(row) csv_file.writerow(row)
CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z]')
def string_camelcase(string):
return CAMELCASE_INVALID_CHARS.sub('', string.title())