1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-15 02:10:36 +00:00
2020-06-09 21:30:19 +02:00

143 lines
4.5 KiB
Python

import re
import os
import stat
import string
from importlib import import_module
from os.path import join, exists, abspath
from shutil import ignore_patterns, move, copy2, copystat
import scrapy
from scrapy.commands import ScrapyCommand
from scrapy.utils.template import render_templatefile, string_camelcase
from scrapy.exceptions import UsageError
TEMPLATES_TO_RENDER = (
('scrapy.cfg',),
('${project_name}', 'settings.py.tmpl'),
('${project_name}', 'items.py.tmpl'),
('${project_name}', 'pipelines.py.tmpl'),
('${project_name}', 'middlewares.py.tmpl'),
)
IGNORE = ignore_patterns('*.pyc', '.svn')
class Command(ScrapyCommand):
requires_project = False
default_settings = {'LOG_ENABLED': False,
'SPIDER_LOADER_WARN_ONLY': True}
def syntax(self):
return "<project_name> [project_dir]"
def short_desc(self):
return "Create new project"
def _is_valid_name(self, project_name):
def _module_exists(module_name):
try:
import_module(module_name)
return True
except ImportError:
return False
if not re.search(r'^[_a-zA-Z]\w*$', project_name):
print('Error: Project names must begin with a letter and contain'
' only\nletters, numbers and underscores')
elif _module_exists(project_name):
print('Error: Module %r already exists' % project_name)
else:
return True
return False
def _copytree(self, src, dst):
"""
Since the original function always creates the directory, to resolve
the issue a new function had to be created. It's a simple copy and
was reduced for this case.
More info at:
https://github.com/scrapy/scrapy/pull/2005
"""
ignore = IGNORE
names = os.listdir(src)
ignored_names = ignore(src, names)
if not os.path.exists(dst):
os.makedirs(dst)
for name in names:
if name in ignored_names:
continue
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
if os.path.isdir(srcname):
self._copytree(srcname, dstname)
else:
copy2(srcname, dstname)
copystat(src, dst)
self._set_rw_permissions(dst)
def _set_rw_permissions(self, path):
"""
Sets permissions of a directory tree to +rw and +rwx for folders.
This is necessary if the start template files come without write
permissions.
"""
mode_rw = (stat.S_IRUSR
| stat.S_IWUSR
| stat.S_IRGRP
| stat.S_IROTH)
mode_x = (stat.S_IXUSR
| stat.S_IXGRP
| stat.S_IXOTH)
os.chmod(path, mode_rw | mode_x)
for root, dirs, files in os.walk(path):
for dir in dirs:
os.chmod(join(root, dir), mode_rw | mode_x)
for file in files:
os.chmod(join(root, file), mode_rw)
def run(self, args, opts):
if len(args) not in (1, 2):
raise UsageError()
project_name = args[0]
project_dir = args[0]
if len(args) == 2:
project_dir = args[1]
if exists(join(project_dir, 'scrapy.cfg')):
self.exitcode = 1
print('Error: scrapy.cfg already exists in %s' % abspath(project_dir))
return
if not self._is_valid_name(project_name):
self.exitcode = 1
return
self._copytree(self.templates_dir, abspath(project_dir))
move(join(project_dir, 'module'), join(project_dir, project_name))
for paths in TEMPLATES_TO_RENDER:
path = join(*paths)
tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name))
render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name))
print("New Scrapy project '%s', using template directory '%s', "
"created in:" % (project_name, self.templates_dir))
print(" %s\n" % abspath(project_dir))
print("You can start your first spider with:")
print(" cd %s" % project_dir)
print(" scrapy genspider example example.com")
@property
def templates_dir(self):
_templates_base_dir = self.settings['TEMPLATES_DIR'] or \
join(scrapy.__path__[0], 'templates')
return join(_templates_base_dir, 'project')