mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 06:24:08 +00:00
removed scrapy-admin.py command, and left only scrapy-ctl as the only scrapy command
This commit is contained in:
parent
39540b188a
commit
8a074c9cb5
@ -1,75 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Scrapy admin script is used to create new scrapy projects and similar
|
||||
tasks"""
|
||||
import os
|
||||
import string
|
||||
from optparse import OptionParser
|
||||
import re
|
||||
|
||||
import scrapy
|
||||
from scrapy.utils.template import render_templatefile, string_camelcase
|
||||
from scrapy.utils.python import ignore_patterns, copytree
|
||||
|
||||
usage = """
|
||||
scrapy-admin.py [options] [command]
|
||||
|
||||
Available commands:
|
||||
|
||||
startproject <project_name>
|
||||
Starts a new project with name 'project_name'
|
||||
"""
|
||||
|
||||
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
|
||||
|
||||
# This is the list of templatefile's path that are rendered *after copying* to
|
||||
# project directory.
|
||||
TEMPLATES = (
|
||||
'scrapy-ctl.py',
|
||||
'${project_name}/settings.py.tmpl',
|
||||
'${project_name}/items.py.tmpl',
|
||||
'${project_name}/pipelines.py.tmpl',
|
||||
)
|
||||
|
||||
IGNORE = ignore_patterns('*.pyc', '.svn')
|
||||
|
||||
def main():
|
||||
parser = OptionParser(usage=usage)
|
||||
opts, args = parser.parse_args()
|
||||
|
||||
if not args:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
cmd = args[0]
|
||||
if cmd == "startproject":
|
||||
if len(args) >= 2:
|
||||
project_name = args[1]
|
||||
if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
|
||||
# Provide a smart error message, depending on the error.
|
||||
if not re.search(r'^[_a-zA-Z]', project_name):
|
||||
message = 'make sure the project_name begins with a letter or underscore'
|
||||
else:
|
||||
message = 'use only numbers, letters and underscores'
|
||||
print "scrapy-admin.py: %r is not a valid project name. Please %s." % (project_name, message)
|
||||
else:
|
||||
project_root_path = project_name
|
||||
|
||||
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
|
||||
copytree(roottpl, project_name, ignore=IGNORE)
|
||||
|
||||
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
|
||||
copytree(moduletpl, '%s/%s' % (project_name, project_name),
|
||||
ignore=IGNORE)
|
||||
|
||||
for path in TEMPLATES:
|
||||
tplfile = os.path.join(project_root_path,
|
||||
string.Template(path).substitute(project_name=project_name))
|
||||
render_templatefile(tplfile, project_name=project_name,
|
||||
ProjectName=string_camelcase(project_name))
|
||||
else:
|
||||
print "scrapy-admin.py: missing project name"
|
||||
else:
|
||||
print "scrapy-admin.py: unknown command: %s" % cmd
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
4
bin/scrapy-ctl.py
Executable file
4
bin/scrapy-ctl.py
Executable file
@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from scrapy.command.cmdline import execute
|
||||
execute()
|
@ -4,35 +4,14 @@
|
||||
Management scripts
|
||||
==================
|
||||
|
||||
Scrapy is controlled by two commmandline scripts:
|
||||
|
||||
1. :ref:`topics-scripts-scrapy-admin`: used to create Scrapy projects.
|
||||
2. :ref:`topics-scripts-scrapy-ctl`: located in every project's root dir, used
|
||||
to manage each project.
|
||||
|
||||
.. _topics-scripts-scrapy-admin:
|
||||
|
||||
scrapy-admin.py
|
||||
===============
|
||||
Usage: ``scrapy-admin.py <subcommand>``
|
||||
|
||||
This script should be in your system path.
|
||||
|
||||
Available subcommands
|
||||
---------------------
|
||||
|
||||
startproject
|
||||
~~~~~~~~~~~~
|
||||
Usage: ``startproject <project_name>``
|
||||
|
||||
Starts a new project with name ``project_name``
|
||||
|
||||
Scrapy is controlled by the ``scrapy-ctl.py`` command.
|
||||
|
||||
.. _topics-scripts-scrapy-ctl:
|
||||
|
||||
scrapy-ctl.py
|
||||
=============
|
||||
Usage: ``scrapy-admin.py <subcommand>``
|
||||
|
||||
Usage: ``scrapy-ctl.py <command>``
|
||||
|
||||
This script is located in every project's root folder.
|
||||
|
||||
@ -154,3 +133,9 @@ start
|
||||
~~~~~
|
||||
Start the Scrapy manager but don't run any spider (idle mode)
|
||||
|
||||
startproject
|
||||
~~~~~~~~~~~~
|
||||
Usage: ``startproject <project_name>``
|
||||
|
||||
Starts a new project with name ``project_name``
|
||||
|
||||
|
@ -210,20 +210,20 @@ Panel`_)::
|
||||
|
||||
set PYTHONPATH=C:\path\to\scrapy-trunk
|
||||
|
||||
Make the scrapy-admin.py script available
|
||||
-----------------------------------------
|
||||
Make the scrapy-ctl.py script available
|
||||
---------------------------------------
|
||||
|
||||
On Unix-like systems, create a symbolic link to the file
|
||||
``scrapy-trunk/scrapy/bin/scrapy-admin.py`` in a directory on your system path,
|
||||
``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` in a directory on your system path,
|
||||
such as ``/usr/local/bin``. For example::
|
||||
|
||||
ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-admin.py /usr/local/bin
|
||||
ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-ctl.py /usr/local/bin
|
||||
|
||||
This simply lets you type scrapy-admin.py from within any directory, rather
|
||||
This simply lets you type ``scrapy-ctl.py`` from within any directory, rather
|
||||
than having to qualify the command with the full path to the file.
|
||||
|
||||
On Windows systems, the same result can be achieved by copying the file
|
||||
``scrapy-trunk/scrapy/bin/scrapy-admin.py`` to somewhere on your system path,
|
||||
``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` to somewhere on your system path,
|
||||
for example ``C:\Python25\Scripts``, which is customary for Python scripts.
|
||||
|
||||
.. _Control Panel: http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/sysdm_advancd_environmnt_addchange_variable.mspx
|
||||
|
@ -36,7 +36,7 @@ Creating a project
|
||||
Before start scraping, you will have set up a new Scrapy project. Enter a
|
||||
directory where you'd like to store your code and then run::
|
||||
|
||||
scrapy-admin.py startproject dmoz
|
||||
python scrapy-ctl.py startproject dmoz
|
||||
|
||||
This will create a ``dmoz`` directory with the following contents::
|
||||
|
||||
|
@ -704,7 +704,7 @@ PROJECT_NAME
|
||||
Default: ``Not Defined``
|
||||
|
||||
The name of the current project. It matches the project module name as created
|
||||
by :ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command,
|
||||
by :ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl-startproject>` command,
|
||||
and is only defined by project settings file.
|
||||
|
||||
.. setting:: REDIRECT_MAX_TIMES
|
||||
@ -953,7 +953,7 @@ TEMPLATES_DIR
|
||||
Default: ``templates`` dir inside scrapy module
|
||||
|
||||
The directory where to look for template when creating new projects with
|
||||
:ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command.
|
||||
:ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl>` command.
|
||||
|
||||
.. setting:: URLLENGTH_LIMIT
|
||||
|
||||
|
@ -9,7 +9,7 @@ import scrapy
|
||||
from scrapy import log
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.xlib import lsprofcalltree
|
||||
from scrapy.conf import settings, SETTINGS_MODULE
|
||||
from scrapy.conf import settings
|
||||
|
||||
# This dict holds information about the executed command for later use
|
||||
command_executed = {}
|
||||
@ -45,12 +45,12 @@ def get_command_name(argv):
|
||||
if not arg.startswith('-'):
|
||||
return arg
|
||||
|
||||
def usage(prog):
|
||||
def usage():
|
||||
s = "Usage\n"
|
||||
s += "=====\n"
|
||||
s += "%s <command> [options] [args]\n" % prog
|
||||
s += "scrapy-ctl.py <command> [options] [args]\n"
|
||||
s += " Run a command\n\n"
|
||||
s += "%s <command> -h\n" % prog
|
||||
s += "scrapy-ctl.py <command> -h\n"
|
||||
s += " Print command help and options\n\n"
|
||||
s += "Available commands\n"
|
||||
s += "===================\n"
|
||||
@ -72,14 +72,10 @@ def update_default_settings(module, cmdname):
|
||||
if not k.startswith("_"):
|
||||
settings.defaults[k] = v
|
||||
|
||||
def execute():
|
||||
if not settings.settings_module:
|
||||
print "Scrapy %s\n" % scrapy.__version__
|
||||
print "Error: Cannot find %r module in python path" % SETTINGS_MODULE
|
||||
sys.exit(1)
|
||||
execute_with_args(sys.argv)
|
||||
def execute(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
|
||||
def execute_with_args(argv):
|
||||
cmds = get_commands_dict()
|
||||
|
||||
cmdname = get_command_name(argv)
|
||||
@ -88,7 +84,7 @@ def execute_with_args(argv):
|
||||
|
||||
if not cmdname:
|
||||
print "Scrapy %s\n" % scrapy.__version__
|
||||
print usage(argv[0])
|
||||
print usage()
|
||||
sys.exit(2)
|
||||
|
||||
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
|
||||
@ -119,6 +115,11 @@ def execute_with_args(argv):
|
||||
command_executed['opts'] = opts.__dict__.copy()
|
||||
|
||||
cmd.process_options(args, opts)
|
||||
if cmd.requires_project and not settings.settings_module:
|
||||
print "Error running: scrapy-ctl.py %s\n" % cmdname
|
||||
print "Cannot find project settings module in python path: %s" % \
|
||||
settings.settings_module_path
|
||||
sys.exit(1)
|
||||
spiders.load()
|
||||
log.start()
|
||||
ret = run_command(cmd, args, opts)
|
||||
|
@ -8,6 +8,9 @@ from scrapy import log
|
||||
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = True
|
||||
|
||||
def syntax(self):
|
||||
return "[options] <domain|url> ..."
|
||||
|
||||
|
@ -4,6 +4,9 @@ from scrapy.command import ScrapyCommand
|
||||
from scrapy.fetcher import fetch
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
|
||||
def syntax(self):
|
||||
return "[options] <url>"
|
||||
|
||||
|
@ -24,6 +24,8 @@ def sanitize_module_name(module_name):
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = True
|
||||
|
||||
def syntax(self):
|
||||
return "[options] <spider_module_name> <spider_domain_name>"
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
from scrapy.command import ScrapyCommand, cmdline
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
|
||||
def syntax(self):
|
||||
return "<command>"
|
||||
|
||||
|
@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
|
||||
from scrapy.spider import spiders
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = True
|
||||
|
||||
def short_desc(self):
|
||||
return "List available spiders"
|
||||
|
||||
|
@ -7,6 +7,9 @@ from scrapy.utils import display
|
||||
from scrapy import log
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = True
|
||||
|
||||
def syntax(self):
|
||||
return "[options] <url>"
|
||||
|
||||
|
@ -21,6 +21,9 @@ from scrapy.http import Request
|
||||
from scrapy.fetcher import get_or_create_spider
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
|
||||
def syntax(self):
|
||||
return "[url]"
|
||||
|
||||
|
@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
|
||||
from scrapy.core.manager import scrapymanager
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = True
|
||||
|
||||
def short_desc(self):
|
||||
return "Start the Scrapy manager but don't run any spider (idle mode)"
|
||||
|
||||
|
62
scrapy/command/commands/startproject.py
Normal file
62
scrapy/command/commands/startproject.py
Normal file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import string
|
||||
import re
|
||||
|
||||
import scrapy
|
||||
from scrapy.command import ScrapyCommand
|
||||
from scrapy.utils.template import render_templatefile, string_camelcase
|
||||
from scrapy.utils.python import ignore_patterns, copytree
|
||||
|
||||
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
|
||||
|
||||
# This is the list of templatefile's path that are rendered *after copying* to
|
||||
# the new project directory.
|
||||
TEMPLATES = (
|
||||
('scrapy-ctl.py',),
|
||||
('${project_name}', 'settings.py.tmpl'),
|
||||
('${project_name}', 'items.py.tmpl'),
|
||||
('${project_name}', 'pipelines.py.tmpl'),
|
||||
)
|
||||
|
||||
IGNORE = ignore_patterns('*.pyc', '.svn')
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
|
||||
def syntax(self):
|
||||
return "<project_name>"
|
||||
|
||||
def short_desc(self):
|
||||
return "Create new project with an initial project template"
|
||||
|
||||
def run(self, args, opts):
|
||||
if len(args) != 1:
|
||||
return False
|
||||
|
||||
project_name = args[0]
|
||||
if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
|
||||
# Provide a smart error message, depending on the error.
|
||||
if not re.search(r'^[a-zA-Z]', project_name):
|
||||
message = 'Project names must begin with a letter'
|
||||
else:
|
||||
message = 'Project names must contain only letters, numbers and underscores'
|
||||
print "Invalid project name: %s\n\n%s" % (project_name, message)
|
||||
else:
|
||||
project_root_path = project_name
|
||||
|
||||
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
|
||||
copytree(roottpl, project_name, ignore=IGNORE)
|
||||
|
||||
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
|
||||
copytree(moduletpl, '%s/%s' % (project_name, project_name),
|
||||
ignore=IGNORE)
|
||||
|
||||
for paths in TEMPLATES:
|
||||
path = os.path.join(*paths)
|
||||
tplfile = os.path.join(project_root_path,
|
||||
string.Template(path).substitute(project_name=project_name))
|
||||
render_templatefile(tplfile, project_name=project_name,
|
||||
ProjectName=string_camelcase(project_name))
|
@ -11,6 +11,9 @@ from optparse import OptionGroup
|
||||
from scrapy.conf import settings
|
||||
|
||||
class ScrapyCommand(object):
|
||||
|
||||
requires_project = False
|
||||
|
||||
def syntax(self):
|
||||
"""
|
||||
Command syntax (preferably one-line). Do not include command name.
|
||||
@ -61,12 +64,17 @@ class ScrapyCommand(object):
|
||||
help="write lsprof profiling stats to FILE")
|
||||
group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
|
||||
help="write process ID to FILE")
|
||||
group.add_option("--set", dest="settings", action="append", \
|
||||
group.add_option("--set", dest="set", action="append", \
|
||||
metavar="SETTING=VALUE", default=[], \
|
||||
help="set/override setting (may be repeated)")
|
||||
group.add_option("--settings", dest="settings", metavar="MODULE",
|
||||
help="python path to the Scrapy project settings")
|
||||
parser.add_option_group(group)
|
||||
|
||||
def process_options(self, args, opts):
|
||||
if opts.settings:
|
||||
settings.set_settings_module(opts.settings)
|
||||
|
||||
if opts.logfile:
|
||||
settings.overrides['LOG_ENABLED'] = True
|
||||
settings.overrides['LOG_FILE'] = opts.logfile
|
||||
@ -90,7 +98,7 @@ class ScrapyCommand(object):
|
||||
with open(opts.pidfile, "w") as f:
|
||||
f.write(str(os.getpid()))
|
||||
|
||||
for setting in opts.settings:
|
||||
for setting in opts.set:
|
||||
if '=' in setting:
|
||||
name, val = setting.split('=', 1)
|
||||
settings.overrides[name] = val
|
||||
|
@ -7,7 +7,6 @@ See documentation in docs/topics/settings.rst
|
||||
import os
|
||||
import cPickle as pickle
|
||||
|
||||
SETTINGS_MODULE = os.environ.get('SCRAPYSETTINGS_MODULE', 'scrapy_settings')
|
||||
SETTINGS_DISABLED = os.environ.get('SCRAPY_SETTINGS_DISABLED', False)
|
||||
|
||||
class Settings(object):
|
||||
@ -19,17 +18,11 @@ class Settings(object):
|
||||
global_defaults = None
|
||||
|
||||
def __init__(self):
|
||||
self.set_settings_module()
|
||||
pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
|
||||
self.overrides = pickle.loads(pickled_settings) if pickled_settings else {}
|
||||
self.settings_module = self._import(SETTINGS_MODULE)
|
||||
self.defaults = {}
|
||||
self.global_defaults = self._import('scrapy.conf.default_settings')
|
||||
|
||||
def _import(self, modulepath):
|
||||
try:
|
||||
return __import__(modulepath, {}, {}, [''])
|
||||
except ImportError:
|
||||
pass
|
||||
self.global_defaults = __import__('scrapy.conf.default_settings', {}, {}, [''])
|
||||
|
||||
def __getitem__(self, opt_name):
|
||||
if not SETTINGS_DISABLED:
|
||||
@ -43,6 +36,16 @@ class Settings(object):
|
||||
return self.defaults[opt_name]
|
||||
return getattr(self.global_defaults, opt_name, None)
|
||||
|
||||
def set_settings_module(self, settings_module_path=None):
|
||||
if settings_module_path is None:
|
||||
settings_module_path = os.environ.get('SCRAPYSETTINGS_MODULE', \
|
||||
'scrapy_settings')
|
||||
self.settings_module_path = settings_module_path
|
||||
try:
|
||||
self.settings_module = __import__(settings_module_path, {}, {}, [''])
|
||||
except ImportError:
|
||||
self.settings_module = None
|
||||
|
||||
def get(self, name, default=None):
|
||||
return self[name] if self[name] is not None else default
|
||||
|
||||
@ -68,4 +71,7 @@ class Settings(object):
|
||||
else:
|
||||
return str(value).split(',')
|
||||
|
||||
def __str__(self):
|
||||
return "<Settings %r>" % self.settings_module_path
|
||||
|
||||
settings = Settings()
|
||||
|
1
scrapy/conf/commands/startproject.py
Normal file
1
scrapy/conf/commands/startproject.py
Normal file
@ -0,0 +1 @@
|
||||
LOG_ENABLED = False
|
@ -19,8 +19,8 @@ class SpiderManager(object):
|
||||
def __init__(self):
|
||||
self.loaded = False
|
||||
self.default_domain = None
|
||||
self.spider_modules = settings.getlist('SPIDER_MODULES')
|
||||
self.force_domain = None
|
||||
self.spider_modules = None
|
||||
|
||||
def fromdomain(self, domain_name):
|
||||
return self.asdict().get(domain_name)
|
||||
@ -56,7 +56,10 @@ class SpiderManager(object):
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
|
||||
def load(self):
|
||||
def load(self, spider_modules=None):
|
||||
if spider_modules is None:
|
||||
spider_modules = settings.getlist('SPIDER_MODULES')
|
||||
self.spider_modules = spider_modules
|
||||
self._invaliddict = {}
|
||||
self._spiders = {}
|
||||
|
||||
@ -75,7 +78,7 @@ class SpiderManager(object):
|
||||
# we can't use the log module here because it may not be available yet
|
||||
print "WARNING: Could not load spider %s: %s" % (spider, e)
|
||||
|
||||
def reload(self, skip_domains=None):
|
||||
def reload(self, spider_modules=None, skip_domains=None):
|
||||
"""Reload spiders by trying to discover any spiders added under the
|
||||
spiders module/packages, removes any spiders removed.
|
||||
|
||||
@ -91,7 +94,7 @@ class SpiderManager(object):
|
||||
if not domain in skip_domains:
|
||||
reload(sys.modules[spider.__module__])
|
||||
reloaded += 1
|
||||
self.load() # second call to update spider instances
|
||||
self.load(spider_modules=spider_modules) # second call to update spider instances
|
||||
log.msg("Reloaded %d/%d scrapy spiders" % (reloaded, len(pdict)), level=log.DEBUG)
|
||||
|
||||
def _getspiders(self, interface, package):
|
||||
|
@ -2,17 +2,15 @@ from __future__ import with_statement
|
||||
|
||||
from unittest import TestCase
|
||||
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.http import Response, Request
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware
|
||||
|
||||
|
||||
class CookiesMiddlewareTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
||||
spiders.reload()
|
||||
self.spider = spiders.fromdomain('scrapytest.org')
|
||||
self.spider = BaseSpider()
|
||||
self.mw = CookiesMiddleware()
|
||||
|
||||
def tearDown(self):
|
||||
|
@ -3,7 +3,7 @@ from __future__ import with_statement
|
||||
from unittest import TestCase
|
||||
from os.path import join, abspath, dirname
|
||||
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.http import Response, Request
|
||||
from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
|
||||
from scrapy.tests import tests_datadir
|
||||
@ -20,9 +20,7 @@ FORMAT = {
|
||||
class HttpCompressionTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
||||
spiders.reload()
|
||||
self.spider = spiders.fromdomain('scrapytest.org')
|
||||
self.spider = BaseSpider()
|
||||
self.mw = HttpCompressionMiddleware()
|
||||
|
||||
def _getresponse(self, coding):
|
||||
|
@ -1,16 +1,14 @@
|
||||
import unittest
|
||||
|
||||
from scrapy.contrib.downloadermiddleware.redirect import RedirectMiddleware
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.core.exceptions import IgnoreRequest
|
||||
from scrapy.http import Request, Response, Headers
|
||||
|
||||
class RedirectMiddlewareTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
||||
spiders.reload()
|
||||
self.spider = spiders.fromdomain('scrapytest.org')
|
||||
self.spider = BaseSpider()
|
||||
self.mw = RedirectMiddleware()
|
||||
|
||||
def test_priority_adjust(self):
|
||||
|
@ -5,14 +5,12 @@ from twisted.internet.error import TimeoutError as ServerTimeoutError, DNSLookup
|
||||
ConnectionLost
|
||||
|
||||
from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.http import Request, Response
|
||||
|
||||
class RetryTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
||||
spiders.reload()
|
||||
self.spider = spiders.fromdomain('scrapytest.org')
|
||||
self.spider = BaseSpider()
|
||||
self.mw = RetryMiddleware()
|
||||
self.mw.max_retry_times = 2
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.http import Request
|
||||
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
|
||||
from scrapy.conf import settings
|
||||
@ -9,9 +9,7 @@ from scrapy.conf import settings
|
||||
class UserAgentMiddlewareTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
||||
spiders.reload()
|
||||
self.spider = spiders.fromdomain('scrapytest.org')
|
||||
self.spider = BaseSpider()
|
||||
self.mw = UserAgentMiddleware()
|
||||
|
||||
def tearDown(self):
|
||||
|
@ -45,8 +45,7 @@ class CrawlingSession(object):
|
||||
self.portno = self.port.getHost().port
|
||||
|
||||
from scrapy.spider import spiders
|
||||
spiders.spider_modules = ['scrapy.tests.test_spiders']
|
||||
spiders.reload()
|
||||
spiders.load(['scrapy.tests.test_spiders'])
|
||||
|
||||
self.spider = spiders.fromdomain(self.domain)
|
||||
if self.spider:
|
||||
|
Loading…
x
Reference in New Issue
Block a user