mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 13:04:20 +00:00
removed scrapy-admin.py command, and left only scrapy-ctl as the only scrapy command
This commit is contained in:
parent
39540b188a
commit
8a074c9cb5
@ -1,75 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
"""Scrapy admin script is used to create new scrapy projects and similar
|
|
||||||
tasks"""
|
|
||||||
import os
|
|
||||||
import string
|
|
||||||
from optparse import OptionParser
|
|
||||||
import re
|
|
||||||
|
|
||||||
import scrapy
|
|
||||||
from scrapy.utils.template import render_templatefile, string_camelcase
|
|
||||||
from scrapy.utils.python import ignore_patterns, copytree
|
|
||||||
|
|
||||||
usage = """
|
|
||||||
scrapy-admin.py [options] [command]
|
|
||||||
|
|
||||||
Available commands:
|
|
||||||
|
|
||||||
startproject <project_name>
|
|
||||||
Starts a new project with name 'project_name'
|
|
||||||
"""
|
|
||||||
|
|
||||||
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
|
|
||||||
|
|
||||||
# This is the list of templatefile's path that are rendered *after copying* to
|
|
||||||
# project directory.
|
|
||||||
TEMPLATES = (
|
|
||||||
'scrapy-ctl.py',
|
|
||||||
'${project_name}/settings.py.tmpl',
|
|
||||||
'${project_name}/items.py.tmpl',
|
|
||||||
'${project_name}/pipelines.py.tmpl',
|
|
||||||
)
|
|
||||||
|
|
||||||
IGNORE = ignore_patterns('*.pyc', '.svn')
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = OptionParser(usage=usage)
|
|
||||||
opts, args = parser.parse_args()
|
|
||||||
|
|
||||||
if not args:
|
|
||||||
parser.print_help()
|
|
||||||
return
|
|
||||||
|
|
||||||
cmd = args[0]
|
|
||||||
if cmd == "startproject":
|
|
||||||
if len(args) >= 2:
|
|
||||||
project_name = args[1]
|
|
||||||
if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
|
|
||||||
# Provide a smart error message, depending on the error.
|
|
||||||
if not re.search(r'^[_a-zA-Z]', project_name):
|
|
||||||
message = 'make sure the project_name begins with a letter or underscore'
|
|
||||||
else:
|
|
||||||
message = 'use only numbers, letters and underscores'
|
|
||||||
print "scrapy-admin.py: %r is not a valid project name. Please %s." % (project_name, message)
|
|
||||||
else:
|
|
||||||
project_root_path = project_name
|
|
||||||
|
|
||||||
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
|
|
||||||
copytree(roottpl, project_name, ignore=IGNORE)
|
|
||||||
|
|
||||||
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
|
|
||||||
copytree(moduletpl, '%s/%s' % (project_name, project_name),
|
|
||||||
ignore=IGNORE)
|
|
||||||
|
|
||||||
for path in TEMPLATES:
|
|
||||||
tplfile = os.path.join(project_root_path,
|
|
||||||
string.Template(path).substitute(project_name=project_name))
|
|
||||||
render_templatefile(tplfile, project_name=project_name,
|
|
||||||
ProjectName=string_camelcase(project_name))
|
|
||||||
else:
|
|
||||||
print "scrapy-admin.py: missing project name"
|
|
||||||
else:
|
|
||||||
print "scrapy-admin.py: unknown command: %s" % cmd
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
4
bin/scrapy-ctl.py
Executable file
4
bin/scrapy-ctl.py
Executable file
@ -0,0 +1,4 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from scrapy.command.cmdline import execute
|
||||||
|
execute()
|
@ -4,35 +4,14 @@
|
|||||||
Management scripts
|
Management scripts
|
||||||
==================
|
==================
|
||||||
|
|
||||||
Scrapy is controlled by two commmandline scripts:
|
Scrapy is controlled by the ``scrapy-ctl.py`` command.
|
||||||
|
|
||||||
1. :ref:`topics-scripts-scrapy-admin`: used to create Scrapy projects.
|
|
||||||
2. :ref:`topics-scripts-scrapy-ctl`: located in every project's root dir, used
|
|
||||||
to manage each project.
|
|
||||||
|
|
||||||
.. _topics-scripts-scrapy-admin:
|
|
||||||
|
|
||||||
scrapy-admin.py
|
|
||||||
===============
|
|
||||||
Usage: ``scrapy-admin.py <subcommand>``
|
|
||||||
|
|
||||||
This script should be in your system path.
|
|
||||||
|
|
||||||
Available subcommands
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
startproject
|
|
||||||
~~~~~~~~~~~~
|
|
||||||
Usage: ``startproject <project_name>``
|
|
||||||
|
|
||||||
Starts a new project with name ``project_name``
|
|
||||||
|
|
||||||
|
|
||||||
.. _topics-scripts-scrapy-ctl:
|
.. _topics-scripts-scrapy-ctl:
|
||||||
|
|
||||||
scrapy-ctl.py
|
scrapy-ctl.py
|
||||||
=============
|
=============
|
||||||
Usage: ``scrapy-admin.py <subcommand>``
|
|
||||||
|
Usage: ``scrapy-ctl.py <command>``
|
||||||
|
|
||||||
This script is located in every project's root folder.
|
This script is located in every project's root folder.
|
||||||
|
|
||||||
@ -154,3 +133,9 @@ start
|
|||||||
~~~~~
|
~~~~~
|
||||||
Start the Scrapy manager but don't run any spider (idle mode)
|
Start the Scrapy manager but don't run any spider (idle mode)
|
||||||
|
|
||||||
|
startproject
|
||||||
|
~~~~~~~~~~~~
|
||||||
|
Usage: ``startproject <project_name>``
|
||||||
|
|
||||||
|
Starts a new project with name ``project_name``
|
||||||
|
|
||||||
|
@ -210,20 +210,20 @@ Panel`_)::
|
|||||||
|
|
||||||
set PYTHONPATH=C:\path\to\scrapy-trunk
|
set PYTHONPATH=C:\path\to\scrapy-trunk
|
||||||
|
|
||||||
Make the scrapy-admin.py script available
|
Make the scrapy-ctl.py script available
|
||||||
-----------------------------------------
|
---------------------------------------
|
||||||
|
|
||||||
On Unix-like systems, create a symbolic link to the file
|
On Unix-like systems, create a symbolic link to the file
|
||||||
``scrapy-trunk/scrapy/bin/scrapy-admin.py`` in a directory on your system path,
|
``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` in a directory on your system path,
|
||||||
such as ``/usr/local/bin``. For example::
|
such as ``/usr/local/bin``. For example::
|
||||||
|
|
||||||
ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-admin.py /usr/local/bin
|
ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-ctl.py /usr/local/bin
|
||||||
|
|
||||||
This simply lets you type scrapy-admin.py from within any directory, rather
|
This simply lets you type ``scrapy-ctl.py`` from within any directory, rather
|
||||||
than having to qualify the command with the full path to the file.
|
than having to qualify the command with the full path to the file.
|
||||||
|
|
||||||
On Windows systems, the same result can be achieved by copying the file
|
On Windows systems, the same result can be achieved by copying the file
|
||||||
``scrapy-trunk/scrapy/bin/scrapy-admin.py`` to somewhere on your system path,
|
``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` to somewhere on your system path,
|
||||||
for example ``C:\Python25\Scripts``, which is customary for Python scripts.
|
for example ``C:\Python25\Scripts``, which is customary for Python scripts.
|
||||||
|
|
||||||
.. _Control Panel: http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/sysdm_advancd_environmnt_addchange_variable.mspx
|
.. _Control Panel: http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/sysdm_advancd_environmnt_addchange_variable.mspx
|
||||||
|
@ -36,7 +36,7 @@ Creating a project
|
|||||||
Before start scraping, you will have set up a new Scrapy project. Enter a
|
Before start scraping, you will have set up a new Scrapy project. Enter a
|
||||||
directory where you'd like to store your code and then run::
|
directory where you'd like to store your code and then run::
|
||||||
|
|
||||||
scrapy-admin.py startproject dmoz
|
python scrapy-ctl.py startproject dmoz
|
||||||
|
|
||||||
This will create a ``dmoz`` directory with the following contents::
|
This will create a ``dmoz`` directory with the following contents::
|
||||||
|
|
||||||
|
@ -704,7 +704,7 @@ PROJECT_NAME
|
|||||||
Default: ``Not Defined``
|
Default: ``Not Defined``
|
||||||
|
|
||||||
The name of the current project. It matches the project module name as created
|
The name of the current project. It matches the project module name as created
|
||||||
by :ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command,
|
by :ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl-startproject>` command,
|
||||||
and is only defined by project settings file.
|
and is only defined by project settings file.
|
||||||
|
|
||||||
.. setting:: REDIRECT_MAX_TIMES
|
.. setting:: REDIRECT_MAX_TIMES
|
||||||
@ -953,7 +953,7 @@ TEMPLATES_DIR
|
|||||||
Default: ``templates`` dir inside scrapy module
|
Default: ``templates`` dir inside scrapy module
|
||||||
|
|
||||||
The directory where to look for template when creating new projects with
|
The directory where to look for template when creating new projects with
|
||||||
:ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command.
|
:ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl>` command.
|
||||||
|
|
||||||
.. setting:: URLLENGTH_LIMIT
|
.. setting:: URLLENGTH_LIMIT
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import scrapy
|
|||||||
from scrapy import log
|
from scrapy import log
|
||||||
from scrapy.spider import spiders
|
from scrapy.spider import spiders
|
||||||
from scrapy.xlib import lsprofcalltree
|
from scrapy.xlib import lsprofcalltree
|
||||||
from scrapy.conf import settings, SETTINGS_MODULE
|
from scrapy.conf import settings
|
||||||
|
|
||||||
# This dict holds information about the executed command for later use
|
# This dict holds information about the executed command for later use
|
||||||
command_executed = {}
|
command_executed = {}
|
||||||
@ -45,12 +45,12 @@ def get_command_name(argv):
|
|||||||
if not arg.startswith('-'):
|
if not arg.startswith('-'):
|
||||||
return arg
|
return arg
|
||||||
|
|
||||||
def usage(prog):
|
def usage():
|
||||||
s = "Usage\n"
|
s = "Usage\n"
|
||||||
s += "=====\n"
|
s += "=====\n"
|
||||||
s += "%s <command> [options] [args]\n" % prog
|
s += "scrapy-ctl.py <command> [options] [args]\n"
|
||||||
s += " Run a command\n\n"
|
s += " Run a command\n\n"
|
||||||
s += "%s <command> -h\n" % prog
|
s += "scrapy-ctl.py <command> -h\n"
|
||||||
s += " Print command help and options\n\n"
|
s += " Print command help and options\n\n"
|
||||||
s += "Available commands\n"
|
s += "Available commands\n"
|
||||||
s += "===================\n"
|
s += "===================\n"
|
||||||
@ -72,14 +72,10 @@ def update_default_settings(module, cmdname):
|
|||||||
if not k.startswith("_"):
|
if not k.startswith("_"):
|
||||||
settings.defaults[k] = v
|
settings.defaults[k] = v
|
||||||
|
|
||||||
def execute():
|
def execute(argv=None):
|
||||||
if not settings.settings_module:
|
if argv is None:
|
||||||
print "Scrapy %s\n" % scrapy.__version__
|
argv = sys.argv
|
||||||
print "Error: Cannot find %r module in python path" % SETTINGS_MODULE
|
|
||||||
sys.exit(1)
|
|
||||||
execute_with_args(sys.argv)
|
|
||||||
|
|
||||||
def execute_with_args(argv):
|
|
||||||
cmds = get_commands_dict()
|
cmds = get_commands_dict()
|
||||||
|
|
||||||
cmdname = get_command_name(argv)
|
cmdname = get_command_name(argv)
|
||||||
@ -88,7 +84,7 @@ def execute_with_args(argv):
|
|||||||
|
|
||||||
if not cmdname:
|
if not cmdname:
|
||||||
print "Scrapy %s\n" % scrapy.__version__
|
print "Scrapy %s\n" % scrapy.__version__
|
||||||
print usage(argv[0])
|
print usage()
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
|
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
|
||||||
@ -119,6 +115,11 @@ def execute_with_args(argv):
|
|||||||
command_executed['opts'] = opts.__dict__.copy()
|
command_executed['opts'] = opts.__dict__.copy()
|
||||||
|
|
||||||
cmd.process_options(args, opts)
|
cmd.process_options(args, opts)
|
||||||
|
if cmd.requires_project and not settings.settings_module:
|
||||||
|
print "Error running: scrapy-ctl.py %s\n" % cmdname
|
||||||
|
print "Cannot find project settings module in python path: %s" % \
|
||||||
|
settings.settings_module_path
|
||||||
|
sys.exit(1)
|
||||||
spiders.load()
|
spiders.load()
|
||||||
log.start()
|
log.start()
|
||||||
ret = run_command(cmd, args, opts)
|
ret = run_command(cmd, args, opts)
|
||||||
|
@ -8,6 +8,9 @@ from scrapy import log
|
|||||||
|
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = True
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
return "[options] <domain|url> ..."
|
return "[options] <domain|url> ..."
|
||||||
|
|
||||||
|
@ -4,6 +4,9 @@ from scrapy.command import ScrapyCommand
|
|||||||
from scrapy.fetcher import fetch
|
from scrapy.fetcher import fetch
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = False
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
return "[options] <url>"
|
return "[options] <url>"
|
||||||
|
|
||||||
|
@ -24,6 +24,8 @@ def sanitize_module_name(module_name):
|
|||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = True
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
return "[options] <spider_module_name> <spider_domain_name>"
|
return "[options] <spider_module_name> <spider_domain_name>"
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
from scrapy.command import ScrapyCommand, cmdline
|
from scrapy.command import ScrapyCommand, cmdline
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = False
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
return "<command>"
|
return "<command>"
|
||||||
|
|
||||||
|
@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
|
|||||||
from scrapy.spider import spiders
|
from scrapy.spider import spiders
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = True
|
||||||
|
|
||||||
def short_desc(self):
|
def short_desc(self):
|
||||||
return "List available spiders"
|
return "List available spiders"
|
||||||
|
|
||||||
|
@ -7,6 +7,9 @@ from scrapy.utils import display
|
|||||||
from scrapy import log
|
from scrapy import log
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = True
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
return "[options] <url>"
|
return "[options] <url>"
|
||||||
|
|
||||||
|
@ -21,6 +21,9 @@ from scrapy.http import Request
|
|||||||
from scrapy.fetcher import get_or_create_spider
|
from scrapy.fetcher import get_or_create_spider
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = False
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
return "[url]"
|
return "[url]"
|
||||||
|
|
||||||
|
@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
|
|||||||
from scrapy.core.manager import scrapymanager
|
from scrapy.core.manager import scrapymanager
|
||||||
|
|
||||||
class Command(ScrapyCommand):
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = True
|
||||||
|
|
||||||
def short_desc(self):
|
def short_desc(self):
|
||||||
return "Start the Scrapy manager but don't run any spider (idle mode)"
|
return "Start the Scrapy manager but don't run any spider (idle mode)"
|
||||||
|
|
||||||
|
62
scrapy/command/commands/startproject.py
Normal file
62
scrapy/command/commands/startproject.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import string
|
||||||
|
import re
|
||||||
|
|
||||||
|
import scrapy
|
||||||
|
from scrapy.command import ScrapyCommand
|
||||||
|
from scrapy.utils.template import render_templatefile, string_camelcase
|
||||||
|
from scrapy.utils.python import ignore_patterns, copytree
|
||||||
|
|
||||||
|
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
|
||||||
|
|
||||||
|
# This is the list of templatefile's path that are rendered *after copying* to
|
||||||
|
# the new project directory.
|
||||||
|
TEMPLATES = (
|
||||||
|
('scrapy-ctl.py',),
|
||||||
|
('${project_name}', 'settings.py.tmpl'),
|
||||||
|
('${project_name}', 'items.py.tmpl'),
|
||||||
|
('${project_name}', 'pipelines.py.tmpl'),
|
||||||
|
)
|
||||||
|
|
||||||
|
IGNORE = ignore_patterns('*.pyc', '.svn')
|
||||||
|
|
||||||
|
class Command(ScrapyCommand):
|
||||||
|
|
||||||
|
requires_project = False
|
||||||
|
|
||||||
|
def syntax(self):
|
||||||
|
return "<project_name>"
|
||||||
|
|
||||||
|
def short_desc(self):
|
||||||
|
return "Create new project with an initial project template"
|
||||||
|
|
||||||
|
def run(self, args, opts):
|
||||||
|
if len(args) != 1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
project_name = args[0]
|
||||||
|
if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
|
||||||
|
# Provide a smart error message, depending on the error.
|
||||||
|
if not re.search(r'^[a-zA-Z]', project_name):
|
||||||
|
message = 'Project names must begin with a letter'
|
||||||
|
else:
|
||||||
|
message = 'Project names must contain only letters, numbers and underscores'
|
||||||
|
print "Invalid project name: %s\n\n%s" % (project_name, message)
|
||||||
|
else:
|
||||||
|
project_root_path = project_name
|
||||||
|
|
||||||
|
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
|
||||||
|
copytree(roottpl, project_name, ignore=IGNORE)
|
||||||
|
|
||||||
|
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
|
||||||
|
copytree(moduletpl, '%s/%s' % (project_name, project_name),
|
||||||
|
ignore=IGNORE)
|
||||||
|
|
||||||
|
for paths in TEMPLATES:
|
||||||
|
path = os.path.join(*paths)
|
||||||
|
tplfile = os.path.join(project_root_path,
|
||||||
|
string.Template(path).substitute(project_name=project_name))
|
||||||
|
render_templatefile(tplfile, project_name=project_name,
|
||||||
|
ProjectName=string_camelcase(project_name))
|
@ -11,6 +11,9 @@ from optparse import OptionGroup
|
|||||||
from scrapy.conf import settings
|
from scrapy.conf import settings
|
||||||
|
|
||||||
class ScrapyCommand(object):
|
class ScrapyCommand(object):
|
||||||
|
|
||||||
|
requires_project = False
|
||||||
|
|
||||||
def syntax(self):
|
def syntax(self):
|
||||||
"""
|
"""
|
||||||
Command syntax (preferably one-line). Do not include command name.
|
Command syntax (preferably one-line). Do not include command name.
|
||||||
@ -61,12 +64,17 @@ class ScrapyCommand(object):
|
|||||||
help="write lsprof profiling stats to FILE")
|
help="write lsprof profiling stats to FILE")
|
||||||
group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
|
group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
|
||||||
help="write process ID to FILE")
|
help="write process ID to FILE")
|
||||||
group.add_option("--set", dest="settings", action="append", \
|
group.add_option("--set", dest="set", action="append", \
|
||||||
metavar="SETTING=VALUE", default=[], \
|
metavar="SETTING=VALUE", default=[], \
|
||||||
help="set/override setting (may be repeated)")
|
help="set/override setting (may be repeated)")
|
||||||
|
group.add_option("--settings", dest="settings", metavar="MODULE",
|
||||||
|
help="python path to the Scrapy project settings")
|
||||||
parser.add_option_group(group)
|
parser.add_option_group(group)
|
||||||
|
|
||||||
def process_options(self, args, opts):
|
def process_options(self, args, opts):
|
||||||
|
if opts.settings:
|
||||||
|
settings.set_settings_module(opts.settings)
|
||||||
|
|
||||||
if opts.logfile:
|
if opts.logfile:
|
||||||
settings.overrides['LOG_ENABLED'] = True
|
settings.overrides['LOG_ENABLED'] = True
|
||||||
settings.overrides['LOG_FILE'] = opts.logfile
|
settings.overrides['LOG_FILE'] = opts.logfile
|
||||||
@ -90,7 +98,7 @@ class ScrapyCommand(object):
|
|||||||
with open(opts.pidfile, "w") as f:
|
with open(opts.pidfile, "w") as f:
|
||||||
f.write(str(os.getpid()))
|
f.write(str(os.getpid()))
|
||||||
|
|
||||||
for setting in opts.settings:
|
for setting in opts.set:
|
||||||
if '=' in setting:
|
if '=' in setting:
|
||||||
name, val = setting.split('=', 1)
|
name, val = setting.split('=', 1)
|
||||||
settings.overrides[name] = val
|
settings.overrides[name] = val
|
||||||
|
@ -7,7 +7,6 @@ See documentation in docs/topics/settings.rst
|
|||||||
import os
|
import os
|
||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
|
|
||||||
SETTINGS_MODULE = os.environ.get('SCRAPYSETTINGS_MODULE', 'scrapy_settings')
|
|
||||||
SETTINGS_DISABLED = os.environ.get('SCRAPY_SETTINGS_DISABLED', False)
|
SETTINGS_DISABLED = os.environ.get('SCRAPY_SETTINGS_DISABLED', False)
|
||||||
|
|
||||||
class Settings(object):
|
class Settings(object):
|
||||||
@ -19,17 +18,11 @@ class Settings(object):
|
|||||||
global_defaults = None
|
global_defaults = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.set_settings_module()
|
||||||
pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
|
pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
|
||||||
self.overrides = pickle.loads(pickled_settings) if pickled_settings else {}
|
self.overrides = pickle.loads(pickled_settings) if pickled_settings else {}
|
||||||
self.settings_module = self._import(SETTINGS_MODULE)
|
|
||||||
self.defaults = {}
|
self.defaults = {}
|
||||||
self.global_defaults = self._import('scrapy.conf.default_settings')
|
self.global_defaults = __import__('scrapy.conf.default_settings', {}, {}, [''])
|
||||||
|
|
||||||
def _import(self, modulepath):
|
|
||||||
try:
|
|
||||||
return __import__(modulepath, {}, {}, [''])
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __getitem__(self, opt_name):
|
def __getitem__(self, opt_name):
|
||||||
if not SETTINGS_DISABLED:
|
if not SETTINGS_DISABLED:
|
||||||
@ -43,6 +36,16 @@ class Settings(object):
|
|||||||
return self.defaults[opt_name]
|
return self.defaults[opt_name]
|
||||||
return getattr(self.global_defaults, opt_name, None)
|
return getattr(self.global_defaults, opt_name, None)
|
||||||
|
|
||||||
|
def set_settings_module(self, settings_module_path=None):
|
||||||
|
if settings_module_path is None:
|
||||||
|
settings_module_path = os.environ.get('SCRAPYSETTINGS_MODULE', \
|
||||||
|
'scrapy_settings')
|
||||||
|
self.settings_module_path = settings_module_path
|
||||||
|
try:
|
||||||
|
self.settings_module = __import__(settings_module_path, {}, {}, [''])
|
||||||
|
except ImportError:
|
||||||
|
self.settings_module = None
|
||||||
|
|
||||||
def get(self, name, default=None):
|
def get(self, name, default=None):
|
||||||
return self[name] if self[name] is not None else default
|
return self[name] if self[name] is not None else default
|
||||||
|
|
||||||
@ -68,4 +71,7 @@ class Settings(object):
|
|||||||
else:
|
else:
|
||||||
return str(value).split(',')
|
return str(value).split(',')
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "<Settings %r>" % self.settings_module_path
|
||||||
|
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
1
scrapy/conf/commands/startproject.py
Normal file
1
scrapy/conf/commands/startproject.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
LOG_ENABLED = False
|
@ -19,8 +19,8 @@ class SpiderManager(object):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.loaded = False
|
self.loaded = False
|
||||||
self.default_domain = None
|
self.default_domain = None
|
||||||
self.spider_modules = settings.getlist('SPIDER_MODULES')
|
|
||||||
self.force_domain = None
|
self.force_domain = None
|
||||||
|
self.spider_modules = None
|
||||||
|
|
||||||
def fromdomain(self, domain_name):
|
def fromdomain(self, domain_name):
|
||||||
return self.asdict().get(domain_name)
|
return self.asdict().get(domain_name)
|
||||||
@ -56,7 +56,10 @@ class SpiderManager(object):
|
|||||||
if not self.loaded:
|
if not self.loaded:
|
||||||
self.load()
|
self.load()
|
||||||
|
|
||||||
def load(self):
|
def load(self, spider_modules=None):
|
||||||
|
if spider_modules is None:
|
||||||
|
spider_modules = settings.getlist('SPIDER_MODULES')
|
||||||
|
self.spider_modules = spider_modules
|
||||||
self._invaliddict = {}
|
self._invaliddict = {}
|
||||||
self._spiders = {}
|
self._spiders = {}
|
||||||
|
|
||||||
@ -75,7 +78,7 @@ class SpiderManager(object):
|
|||||||
# we can't use the log module here because it may not be available yet
|
# we can't use the log module here because it may not be available yet
|
||||||
print "WARNING: Could not load spider %s: %s" % (spider, e)
|
print "WARNING: Could not load spider %s: %s" % (spider, e)
|
||||||
|
|
||||||
def reload(self, skip_domains=None):
|
def reload(self, spider_modules=None, skip_domains=None):
|
||||||
"""Reload spiders by trying to discover any spiders added under the
|
"""Reload spiders by trying to discover any spiders added under the
|
||||||
spiders module/packages, removes any spiders removed.
|
spiders module/packages, removes any spiders removed.
|
||||||
|
|
||||||
@ -91,7 +94,7 @@ class SpiderManager(object):
|
|||||||
if not domain in skip_domains:
|
if not domain in skip_domains:
|
||||||
reload(sys.modules[spider.__module__])
|
reload(sys.modules[spider.__module__])
|
||||||
reloaded += 1
|
reloaded += 1
|
||||||
self.load() # second call to update spider instances
|
self.load(spider_modules=spider_modules) # second call to update spider instances
|
||||||
log.msg("Reloaded %d/%d scrapy spiders" % (reloaded, len(pdict)), level=log.DEBUG)
|
log.msg("Reloaded %d/%d scrapy spiders" % (reloaded, len(pdict)), level=log.DEBUG)
|
||||||
|
|
||||||
def _getspiders(self, interface, package):
|
def _getspiders(self, interface, package):
|
||||||
|
@ -2,17 +2,15 @@ from __future__ import with_statement
|
|||||||
|
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
|
|
||||||
from scrapy.spider import spiders
|
|
||||||
from scrapy.http import Response, Request
|
from scrapy.http import Response, Request
|
||||||
|
from scrapy.spider import BaseSpider
|
||||||
from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware
|
from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware
|
||||||
|
|
||||||
|
|
||||||
class CookiesMiddlewareTest(TestCase):
|
class CookiesMiddlewareTest(TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
self.spider = BaseSpider()
|
||||||
spiders.reload()
|
|
||||||
self.spider = spiders.fromdomain('scrapytest.org')
|
|
||||||
self.mw = CookiesMiddleware()
|
self.mw = CookiesMiddleware()
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import with_statement
|
|||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from os.path import join, abspath, dirname
|
from os.path import join, abspath, dirname
|
||||||
|
|
||||||
from scrapy.spider import spiders
|
from scrapy.spider import BaseSpider
|
||||||
from scrapy.http import Response, Request
|
from scrapy.http import Response, Request
|
||||||
from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
|
from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
|
||||||
from scrapy.tests import tests_datadir
|
from scrapy.tests import tests_datadir
|
||||||
@ -20,9 +20,7 @@ FORMAT = {
|
|||||||
class HttpCompressionTest(TestCase):
|
class HttpCompressionTest(TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
self.spider = BaseSpider()
|
||||||
spiders.reload()
|
|
||||||
self.spider = spiders.fromdomain('scrapytest.org')
|
|
||||||
self.mw = HttpCompressionMiddleware()
|
self.mw = HttpCompressionMiddleware()
|
||||||
|
|
||||||
def _getresponse(self, coding):
|
def _getresponse(self, coding):
|
||||||
|
@ -1,16 +1,14 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from scrapy.contrib.downloadermiddleware.redirect import RedirectMiddleware
|
from scrapy.contrib.downloadermiddleware.redirect import RedirectMiddleware
|
||||||
from scrapy.spider import spiders
|
from scrapy.spider import BaseSpider
|
||||||
from scrapy.core.exceptions import IgnoreRequest
|
from scrapy.core.exceptions import IgnoreRequest
|
||||||
from scrapy.http import Request, Response, Headers
|
from scrapy.http import Request, Response, Headers
|
||||||
|
|
||||||
class RedirectMiddlewareTest(unittest.TestCase):
|
class RedirectMiddlewareTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
self.spider = BaseSpider()
|
||||||
spiders.reload()
|
|
||||||
self.spider = spiders.fromdomain('scrapytest.org')
|
|
||||||
self.mw = RedirectMiddleware()
|
self.mw = RedirectMiddleware()
|
||||||
|
|
||||||
def test_priority_adjust(self):
|
def test_priority_adjust(self):
|
||||||
|
@ -5,14 +5,12 @@ from twisted.internet.error import TimeoutError as ServerTimeoutError, DNSLookup
|
|||||||
ConnectionLost
|
ConnectionLost
|
||||||
|
|
||||||
from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
|
from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
|
||||||
from scrapy.spider import spiders
|
from scrapy.spider import BaseSpider
|
||||||
from scrapy.http import Request, Response
|
from scrapy.http import Request, Response
|
||||||
|
|
||||||
class RetryTest(unittest.TestCase):
|
class RetryTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
self.spider = BaseSpider()
|
||||||
spiders.reload()
|
|
||||||
self.spider = spiders.fromdomain('scrapytest.org')
|
|
||||||
self.mw = RetryMiddleware()
|
self.mw = RetryMiddleware()
|
||||||
self.mw.max_retry_times = 2
|
self.mw.max_retry_times = 2
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
|
|
||||||
from scrapy.spider import spiders
|
from scrapy.spider import BaseSpider
|
||||||
from scrapy.http import Request
|
from scrapy.http import Request
|
||||||
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
|
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
|
||||||
from scrapy.conf import settings
|
from scrapy.conf import settings
|
||||||
@ -9,9 +9,7 @@ from scrapy.conf import settings
|
|||||||
class UserAgentMiddlewareTest(TestCase):
|
class UserAgentMiddlewareTest(TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
self.spider = BaseSpider()
|
||||||
spiders.reload()
|
|
||||||
self.spider = spiders.fromdomain('scrapytest.org')
|
|
||||||
self.mw = UserAgentMiddleware()
|
self.mw = UserAgentMiddleware()
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
|
@ -45,8 +45,7 @@ class CrawlingSession(object):
|
|||||||
self.portno = self.port.getHost().port
|
self.portno = self.port.getHost().port
|
||||||
|
|
||||||
from scrapy.spider import spiders
|
from scrapy.spider import spiders
|
||||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
spiders.load(['scrapy.tests.test_spiders'])
|
||||||
spiders.reload()
|
|
||||||
|
|
||||||
self.spider = spiders.fromdomain(self.domain)
|
self.spider = spiders.fromdomain(self.domain)
|
||||||
if self.spider:
|
if self.spider:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user