1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 19:03:53 +00:00

Creates an usable project structure on startproject. closes #58 and closes #54

--HG--
rename : scrapy/trunk/scrapy/conf/project_template/__init__.py => scrapy/trunk/scrapy/templates/project/module/__init__.py
rename : scrapy/trunk/scrapy/conf/project_template/items.py => scrapy/trunk/scrapy/templates/project/module/items.py
rename : scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py => scrapy/trunk/scrapy/templates/project/module/settings.py
rename : scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py => scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl
rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40819
This commit is contained in:
Daniel Grana 2009-02-05 12:12:15 +00:00
parent bccef8a463
commit f7dac0e449
14 changed files with 57 additions and 23 deletions

View File

@ -3,10 +3,11 @@
tasks"""
import os
import shutil
import string
from optparse import OptionParser
import scrapy
from scrapy.utils.misc import render_templatefile
from scrapy.utils.misc import render_templatefile, string_camelcase
usage = """
scrapy-admin.py [options] [command]
@ -17,8 +18,15 @@ Available commands:
Starts a new project with name 'project_name'
"""
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
# This is the list of templatefile's path that are rendered *after copying* to
# project directory.
TEMPLATES = (
'scrapy_settings.py',
'scrapy-ctl.py',
'${project_name}/settings.py',
'${project_name}/items.py',
'${project_name}/pipelines.py',
)
def main():
@ -33,10 +41,20 @@ def main():
if cmd == "startproject":
if len(args) >= 2:
project_name = args[1]
project_tplpath = os.path.join(scrapy.__path__[0], "conf", "project_template")
shutil.copytree(project_tplpath, project_name)
project_root_path = project_name
project_module_path = '%s/%s' % (project_name, project_name)
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
shutil.copytree(roottpl, project_name)
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
shutil.copytree(moduletpl, '%s/%s' % (project_name, project_name))
for path in TEMPLATES:
render_templatefile(os.path.join(project_name, path), project_name=project_name)
tplfile = os.path.join(project_root_path,
string.Template(path).substitute(project_name=project_name))
render_templatefile(tplfile, project_name=project_name,
ProjectName=string_camelcase(project_name))
else:
print "scrapy-admin.py: missing project name"
else:

View File

@ -7,7 +7,8 @@ import shutil
from scrapy.spider import spiders
from scrapy.command import ScrapyCommand
from scrapy.conf import settings
from scrapy.utils.misc import render_templatefile
from scrapy.utils.misc import render_templatefile, string_camelcase
class Command(ScrapyCommand):
""" Childs can define custom tvars """
@ -54,7 +55,8 @@ class Command(ScrapyCommand):
def _genspider(self, name, site, template_file):
""" Generate spider """
tvars = {
'project_name': settings.get('BOT_NAME'),
'project_name': settings.get('PROJECT_NAME'),
'ProjectName': string_camelcase(settings.get('PROJECT_NAME')),
'name': name,
'site': site,
'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')])

View File

@ -1,4 +0,0 @@
#!/usr/bin/env python
from scrapy.command.cmdline import execute
execute()

View File

@ -2,5 +2,5 @@
from scrapy.item import ScrapedItem
class MyItem(ScrapedItem):
class ${ProjectName}Item(ScrapedItem):
pass

View File

@ -0,0 +1,5 @@
# Define yours item pipelines here
class ${ProjectName}Pipeline(object):
def process_item(self, domain, item):
return item

View File

@ -3,8 +3,9 @@ import $project_name
# ---------------------------------------------------------------------------
# - Scrapy settings for $project_name -
# ---------------------------------------------------------------------------
PROJECT_NAME = '$project_name'
BOT_NAME = '$project_name'
BOT_NAME = PROJECT_NAME
BOT_VERSION = '1.0'
SPIDER_MODULES = ['$project_name.spiders']
@ -79,9 +80,9 @@ SPIDER_MIDDLEWARES = (
# Spider side
)
# Item pipelines are usually configured by commands (see conf/commands)
#ITEM_PIPELINES = (
#)
ITEM_PIPELINES = (
'${project_name}.pipelines.${ProjectName}Pipeline',
)
#DEPTH_LIMIT = 10 # limit the maximum link depth to follow
#DEPTH_STATS = 1 # enable depth stats

View File

@ -4,7 +4,7 @@ import re
from scrapy.xpath import HtmlXPathSelector
from scrapy.link.extractors import RegexLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from $project_name.items import MyItem
from $project_name.items import ${ProjectName}Item
class $classname(CrawlSpider):
domain_name = '$site'
@ -15,7 +15,7 @@ class $classname(CrawlSpider):
)
def parse_item(self, response):
i = MyItem()
i = ${ProjectName}Item()
#xs = HtmlXPathSelector(response)
#i.attribute('site_id', xs.x('//input[@id="sid"]/@value'))
#i.attribute('name', xs.x('//div[@id="name"]'))

View File

@ -1,6 +1,6 @@
# -*- coding: utf8 -*-
from scrapy.contrib.spiders import CSVFeedSpider
from $project_name.items import MyItem
from $project_name.items import ${ProjectName}Item
class $classname(CSVFeedSpider):
domain_name = '$site'
@ -13,7 +13,7 @@ class $classname(CSVFeedSpider):
# return response
def parse_row(self, response, row):
i = MyItem()
i = ${ProjectName}Item()
#i.attribute('url', row['url'])
#i.attribute('name', row['name'])
#i.attribute('description', row['description'])

View File

@ -1,13 +1,13 @@
# -*- coding: utf8 -*-
from scrapy.contrib.spiders import XMLFeedSpider
from $project_name.items import MyItem
from $project_name.items import ${ProjectName}Item
class $classname(XMLFeedSpider):
domain_name = '$site'
start_urls = ['http://www.$site/feed.xml']
def parse_item(self, response, xSel):
i = MyItem()
i = ${ProjectName}Item()
#i.attribute('url', xSel('url'))
#i.attribute('name', xSel('name'))
#i.attribute('description', xSel('description'))

View File

@ -0,0 +1,7 @@
#!/usr/bin/env python
import os
os.environ.setdefault('SCRAPYSETTINGS_MODULE', '${project_name}.settings')
from scrapy.command.cmdline import execute
execute()

View File

@ -153,3 +153,8 @@ def items_to_csv(file, items, delimiter=';', headers=None):
value = unicode_to_str(value) if isinstance(value, basestring) else value
row.append(value)
csv_file.writerow(row)
CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z]')
def string_camelcase(string):
return CAMELCASE_INVALID_CHARS.sub('', string.title())