From f7dac0e449abc1f07cc6e5786a9d3f105ef1d143 Mon Sep 17 00:00:00 2001 From: Daniel Grana Date: Thu, 5 Feb 2009 12:12:15 +0000 Subject: [PATCH] Creates an usable project structure on startproject. closes #58 and closes #54 --HG-- rename : scrapy/trunk/scrapy/conf/project_template/__init__.py => scrapy/trunk/scrapy/templates/project/module/__init__.py rename : scrapy/trunk/scrapy/conf/project_template/items.py => scrapy/trunk/scrapy/templates/project/module/items.py rename : scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py => scrapy/trunk/scrapy/templates/project/module/settings.py rename : scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py => scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl rename : scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl => scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40819 --- scrapy/trunk/scrapy/bin/scrapy-admin.py | 30 +++++++++++++++---- .../scrapy/command/commands/genspider.py | 6 ++-- .../conf/project_template/scrapy-ctl.py | 4 --- .../project/module}/__init__.py | 0 .../project/module}/items.py | 2 +- .../templates/project/module/pipelines.py | 5 ++++ .../project/module/settings.py} | 9 +++--- .../project/module}/spiders/__init__.py | 0 .../module}/templates/spider_basic.tmpl | 0 .../module}/templates/spider_crawl.tmpl | 4 +-- .../module}/templates/spider_csvfeed.tmpl | 4 +-- .../module}/templates/spider_xmlfeed.tmpl | 4 +-- .../templates/project/root/scrapy-ctl.py | 7 +++++ scrapy/trunk/scrapy/utils/misc.py | 5 ++++ 14 files changed, 57 insertions(+), 23 deletions(-) delete mode 100755 scrapy/trunk/scrapy/conf/project_template/scrapy-ctl.py rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/__init__.py (100%) rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/items.py (70%) create mode 100644 scrapy/trunk/scrapy/templates/project/module/pipelines.py rename scrapy/trunk/scrapy/{conf/project_template/scrapy_settings.py => templates/project/module/settings.py} (96%) rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/spiders/__init__.py (100%) rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/templates/spider_basic.tmpl (100%) rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/templates/spider_crawl.tmpl (89%) rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/templates/spider_csvfeed.tmpl (87%) rename scrapy/trunk/scrapy/{conf/project_template => templates/project/module}/templates/spider_xmlfeed.tmpl (82%) create mode 100755 scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py diff --git a/scrapy/trunk/scrapy/bin/scrapy-admin.py b/scrapy/trunk/scrapy/bin/scrapy-admin.py index 5804a0c63..a64228008 100755 --- a/scrapy/trunk/scrapy/bin/scrapy-admin.py +++ b/scrapy/trunk/scrapy/bin/scrapy-admin.py @@ -3,10 +3,11 @@ tasks""" import os import shutil +import string from optparse import OptionParser import scrapy -from scrapy.utils.misc import render_templatefile +from scrapy.utils.misc import render_templatefile, string_camelcase usage = """ scrapy-admin.py [options] [command] @@ -17,14 +18,21 @@ Available commands: Starts a new project with name 'project_name' """ +PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project') + +# This is the list of templatefile's path that are rendered *after copying* to +# project directory. TEMPLATES = ( - 'scrapy_settings.py', + 'scrapy-ctl.py', + '${project_name}/settings.py', + '${project_name}/items.py', + '${project_name}/pipelines.py', ) def main(): parser = OptionParser(usage=usage) opts, args = parser.parse_args() - + if not args: parser.print_help() return @@ -33,10 +41,20 @@ def main(): if cmd == "startproject": if len(args) >= 2: project_name = args[1] - project_tplpath = os.path.join(scrapy.__path__[0], "conf", "project_template") - shutil.copytree(project_tplpath, project_name) + project_root_path = project_name + project_module_path = '%s/%s' % (project_name, project_name) + + roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root') + shutil.copytree(roottpl, project_name) + + moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module') + shutil.copytree(moduletpl, '%s/%s' % (project_name, project_name)) + for path in TEMPLATES: - render_templatefile(os.path.join(project_name, path), project_name=project_name) + tplfile = os.path.join(project_root_path, + string.Template(path).substitute(project_name=project_name)) + render_templatefile(tplfile, project_name=project_name, + ProjectName=string_camelcase(project_name)) else: print "scrapy-admin.py: missing project name" else: diff --git a/scrapy/trunk/scrapy/command/commands/genspider.py b/scrapy/trunk/scrapy/command/commands/genspider.py index 249a4a30e..3e954908a 100644 --- a/scrapy/trunk/scrapy/command/commands/genspider.py +++ b/scrapy/trunk/scrapy/command/commands/genspider.py @@ -7,7 +7,8 @@ import shutil from scrapy.spider import spiders from scrapy.command import ScrapyCommand from scrapy.conf import settings -from scrapy.utils.misc import render_templatefile +from scrapy.utils.misc import render_templatefile, string_camelcase + class Command(ScrapyCommand): """ Childs can define custom tvars """ @@ -54,7 +55,8 @@ class Command(ScrapyCommand): def _genspider(self, name, site, template_file): """ Generate spider """ tvars = { - 'project_name': settings.get('BOT_NAME'), + 'project_name': settings.get('PROJECT_NAME'), + 'ProjectName': string_camelcase(settings.get('PROJECT_NAME')), 'name': name, 'site': site, 'classname': '%sSpider' % ''.join([s.capitalize() for s in name.split('_')]) diff --git a/scrapy/trunk/scrapy/conf/project_template/scrapy-ctl.py b/scrapy/trunk/scrapy/conf/project_template/scrapy-ctl.py deleted file mode 100755 index e8e56eaf2..000000000 --- a/scrapy/trunk/scrapy/conf/project_template/scrapy-ctl.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python - -from scrapy.command.cmdline import execute -execute() diff --git a/scrapy/trunk/scrapy/conf/project_template/__init__.py b/scrapy/trunk/scrapy/templates/project/module/__init__.py similarity index 100% rename from scrapy/trunk/scrapy/conf/project_template/__init__.py rename to scrapy/trunk/scrapy/templates/project/module/__init__.py diff --git a/scrapy/trunk/scrapy/conf/project_template/items.py b/scrapy/trunk/scrapy/templates/project/module/items.py similarity index 70% rename from scrapy/trunk/scrapy/conf/project_template/items.py rename to scrapy/trunk/scrapy/templates/project/module/items.py index d78c4d149..bf93a82e2 100644 --- a/scrapy/trunk/scrapy/conf/project_template/items.py +++ b/scrapy/trunk/scrapy/templates/project/module/items.py @@ -2,5 +2,5 @@ from scrapy.item import ScrapedItem -class MyItem(ScrapedItem): +class ${ProjectName}Item(ScrapedItem): pass diff --git a/scrapy/trunk/scrapy/templates/project/module/pipelines.py b/scrapy/trunk/scrapy/templates/project/module/pipelines.py new file mode 100644 index 000000000..61ceea1fa --- /dev/null +++ b/scrapy/trunk/scrapy/templates/project/module/pipelines.py @@ -0,0 +1,5 @@ +# Define yours item pipelines here + +class ${ProjectName}Pipeline(object): + def process_item(self, domain, item): + return item diff --git a/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py b/scrapy/trunk/scrapy/templates/project/module/settings.py similarity index 96% rename from scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py rename to scrapy/trunk/scrapy/templates/project/module/settings.py index 4b88158f4..dde506c40 100644 --- a/scrapy/trunk/scrapy/conf/project_template/scrapy_settings.py +++ b/scrapy/trunk/scrapy/templates/project/module/settings.py @@ -3,8 +3,9 @@ import $project_name # --------------------------------------------------------------------------- # - Scrapy settings for $project_name - # --------------------------------------------------------------------------- +PROJECT_NAME = '$project_name' -BOT_NAME = '$project_name' +BOT_NAME = PROJECT_NAME BOT_VERSION = '1.0' SPIDER_MODULES = ['$project_name.spiders'] @@ -79,9 +80,9 @@ SPIDER_MIDDLEWARES = ( # Spider side ) -# Item pipelines are usually configured by commands (see conf/commands) -#ITEM_PIPELINES = ( -#) +ITEM_PIPELINES = ( + '${project_name}.pipelines.${ProjectName}Pipeline', +) #DEPTH_LIMIT = 10 # limit the maximum link depth to follow #DEPTH_STATS = 1 # enable depth stats diff --git a/scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py b/scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py similarity index 100% rename from scrapy/trunk/scrapy/conf/project_template/spiders/__init__.py rename to scrapy/trunk/scrapy/templates/project/module/spiders/__init__.py diff --git a/scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl b/scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl similarity index 100% rename from scrapy/trunk/scrapy/conf/project_template/templates/spider_basic.tmpl rename to scrapy/trunk/scrapy/templates/project/module/templates/spider_basic.tmpl diff --git a/scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl b/scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl similarity index 89% rename from scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl rename to scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl index a45ffdf42..9f777b231 100644 --- a/scrapy/trunk/scrapy/conf/project_template/templates/spider_crawl.tmpl +++ b/scrapy/trunk/scrapy/templates/project/module/templates/spider_crawl.tmpl @@ -4,7 +4,7 @@ import re from scrapy.xpath import HtmlXPathSelector from scrapy.link.extractors import RegexLinkExtractor from scrapy.contrib.spiders import CrawlSpider, Rule -from $project_name.items import MyItem +from $project_name.items import ${ProjectName}Item class $classname(CrawlSpider): domain_name = '$site' @@ -15,7 +15,7 @@ class $classname(CrawlSpider): ) def parse_item(self, response): - i = MyItem() + i = ${ProjectName}Item() #xs = HtmlXPathSelector(response) #i.attribute('site_id', xs.x('//input[@id="sid"]/@value')) #i.attribute('name', xs.x('//div[@id="name"]')) diff --git a/scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl b/scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl similarity index 87% rename from scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl rename to scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl index bc96a3de9..2d6ec5991 100644 --- a/scrapy/trunk/scrapy/conf/project_template/templates/spider_csvfeed.tmpl +++ b/scrapy/trunk/scrapy/templates/project/module/templates/spider_csvfeed.tmpl @@ -1,6 +1,6 @@ # -*- coding: utf8 -*- from scrapy.contrib.spiders import CSVFeedSpider -from $project_name.items import MyItem +from $project_name.items import ${ProjectName}Item class $classname(CSVFeedSpider): domain_name = '$site' @@ -13,7 +13,7 @@ class $classname(CSVFeedSpider): # return response def parse_row(self, response, row): - i = MyItem() + i = ${ProjectName}Item() #i.attribute('url', row['url']) #i.attribute('name', row['name']) #i.attribute('description', row['description']) diff --git a/scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl b/scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl similarity index 82% rename from scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl rename to scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl index 7ea43375e..395204cf5 100644 --- a/scrapy/trunk/scrapy/conf/project_template/templates/spider_xmlfeed.tmpl +++ b/scrapy/trunk/scrapy/templates/project/module/templates/spider_xmlfeed.tmpl @@ -1,13 +1,13 @@ # -*- coding: utf8 -*- from scrapy.contrib.spiders import XMLFeedSpider -from $project_name.items import MyItem +from $project_name.items import ${ProjectName}Item class $classname(XMLFeedSpider): domain_name = '$site' start_urls = ['http://www.$site/feed.xml'] def parse_item(self, response, xSel): - i = MyItem() + i = ${ProjectName}Item() #i.attribute('url', xSel('url')) #i.attribute('name', xSel('name')) #i.attribute('description', xSel('description')) diff --git a/scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py b/scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py new file mode 100755 index 000000000..2d2ed44e9 --- /dev/null +++ b/scrapy/trunk/scrapy/templates/project/root/scrapy-ctl.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import os +os.environ.setdefault('SCRAPYSETTINGS_MODULE', '${project_name}.settings') + +from scrapy.command.cmdline import execute +execute() diff --git a/scrapy/trunk/scrapy/utils/misc.py b/scrapy/trunk/scrapy/utils/misc.py index ea926cdde..c6352fc3e 100644 --- a/scrapy/trunk/scrapy/utils/misc.py +++ b/scrapy/trunk/scrapy/utils/misc.py @@ -153,3 +153,8 @@ def items_to_csv(file, items, delimiter=';', headers=None): value = unicode_to_str(value) if isinstance(value, basestring) else value row.append(value) csv_file.writerow(row) + + +CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z]') +def string_camelcase(string): + return CAMELCASE_INVALID_CHARS.sub('', string.title())