mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 09:04:16 +00:00
Support running projects without eggs in Scrapyd. Closes #238
This commit is contained in:
parent
b76cd42690
commit
833baa6041
@ -1,27 +1,9 @@
|
||||
"""
|
||||
This module can be used to run a Scrapy project contained in an egg file
|
||||
|
||||
To see all spiders in a project:
|
||||
|
||||
python -m scrapyd.eggrunner myproject.egg list
|
||||
|
||||
To crawl a spider:
|
||||
|
||||
python -m scrapyd.eggrunner myproject.egg crawl somespider
|
||||
"""
|
||||
|
||||
import sys
|
||||
import sys, os
|
||||
|
||||
from scrapyd.eggutils import activate_egg
|
||||
|
||||
def main(eggpath, args):
|
||||
"""Run scrapy for the settings module name passed"""
|
||||
eggpath = os.environ.get('SCRAPY_EGGFILE')
|
||||
if eggpath:
|
||||
activate_egg(eggpath)
|
||||
from scrapy.cmdline import execute
|
||||
execute(['scrapy'] + list(args))
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
print "usage: %s <eggfile> [scrapy_command args ...]" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
main(sys.argv[1], sys.argv[2:])
|
||||
from scrapy.cmdline import execute
|
||||
execute()
|
||||
|
@ -26,7 +26,10 @@ class FilesystemEggStorage(object):
|
||||
|
||||
def get(self, project, version=None):
|
||||
if version is None:
|
||||
try:
|
||||
version = self.list(project)[-1]
|
||||
except IndexError:
|
||||
return None, None
|
||||
return version, open(self._eggpath(project, version), 'rb')
|
||||
|
||||
def list(self, project):
|
||||
|
@ -12,9 +12,10 @@ def get_spider_list_from_eggfile(eggfile, project):
|
||||
shutil.copyfileobj(eggfile, f)
|
||||
f.flush()
|
||||
eggfile.seek(0)
|
||||
pargs = [sys.executable, '-m', 'scrapyd.eggrunner', f.name, 'list']
|
||||
pargs = [sys.executable, '-m', 'scrapyd.eggrunner', 'list']
|
||||
env = os.environ.copy()
|
||||
env['SCRAPY_PROJECT'] = project
|
||||
env['SCRAPY_EGGFILE'] = f.name
|
||||
proc = Popen(pargs, stdout=PIPE, cwd=tmpdir, env=env)
|
||||
out = proc.communicate()[0]
|
||||
return out.splitlines()
|
||||
|
@ -11,11 +11,19 @@ class Environment(object):
|
||||
def __init__(self, config):
|
||||
self.dbs_dir = config.get('dbs_dir', 'dbs')
|
||||
self.logs_dir = config.get('logs_dir', 'logs')
|
||||
if config.cp.has_section('settings'):
|
||||
self.settings = dict(config.cp.items('settings'))
|
||||
else:
|
||||
self.settings = {}
|
||||
|
||||
def get_environment(self, message, slot):
|
||||
def get_environment(self, message, slot, eggpath):
|
||||
project = message['project']
|
||||
env = os.environ.copy()
|
||||
env['SCRAPY_PROJECT'] = project
|
||||
if eggpath:
|
||||
env['SCRAPY_EGGFILE'] = eggpath
|
||||
elif project in self.settings:
|
||||
env['SCRAPY_SETTINGS_MODULE'] = self.settings[project]
|
||||
dbpath = os.path.join(self.dbs_dir, '%s.db' % project)
|
||||
env['SCRAPY_SQLITE_DB'] = dbpath
|
||||
logpath = os.path.join(self.logs_dir, 'slot%s.log' % slot)
|
||||
|
@ -10,7 +10,8 @@ class IEggStorage(Interface):
|
||||
def get(project, version=None):
|
||||
"""Return a tuple (version, file) with the the egg for the specified
|
||||
project and version. If version is None, the latest version is
|
||||
returned."""
|
||||
returned. If no egg is found for the given project/version (None, None)
|
||||
should be returned."""
|
||||
|
||||
def list(project):
|
||||
"""Return the list of versions which have eggs stored (for the given
|
||||
@ -61,9 +62,13 @@ class ISpiderScheduler(Interface):
|
||||
class IEnvironment(Interface):
|
||||
"""A component to generate the environment of crawler processes"""
|
||||
|
||||
def get_environment(message, slot):
|
||||
def get_environment(message, slot, eggpath):
|
||||
"""Return the environment variables to use for running the process.
|
||||
|
||||
`message` is the message received from the IPoller.next()
|
||||
`slot` is the Launcher slot where the process will be running.
|
||||
`eggpath` is the path to an eggfile that contains the project code. The
|
||||
`eggpath` may be `None` if no egg was found for the project, in
|
||||
which case the project must be on the python path and its settings
|
||||
defined in scrapyd.conf [settings] section
|
||||
"""
|
||||
|
@ -39,14 +39,15 @@ class Launcher(Service):
|
||||
def _spawn_process(self, message, slot):
|
||||
project = message['project']
|
||||
eggpath = self._get_eggpath(project)
|
||||
args = [sys.executable, '-m', self.egg_runner, eggpath, 'crawl']
|
||||
args = [sys.executable, '-m', self.egg_runner, 'crawl']
|
||||
e = self.app.getComponent(IEnvironment)
|
||||
env = e.get_environment(message, slot)
|
||||
env = e.get_environment(message, slot, eggpath)
|
||||
pp = ScrapyProcessProtocol(eggpath, slot)
|
||||
pp.deferred.addBoth(self._process_finished, eggpath, slot)
|
||||
reactor.spawnProcess(pp, sys.executable, args=args, env=env)
|
||||
|
||||
def _process_finished(self, _, eggpath, slot):
|
||||
if eggpath:
|
||||
os.remove(eggpath)
|
||||
self._wait_for_project(slot)
|
||||
|
||||
|
@ -14,15 +14,29 @@ class EggStorageTest(unittest.TestCase):
|
||||
d = self.mktemp()
|
||||
os.mkdir(d)
|
||||
config = Config(values={'eggs_dir': d, 'logs_dir': d})
|
||||
config.cp.add_section('settings')
|
||||
config.cp.set('settings', 'newbot', 'newbot.settings')
|
||||
self.environ = Environment(config)
|
||||
|
||||
def test_interface(self):
|
||||
verifyObject(IEnvironment, self.environ)
|
||||
|
||||
def test_get_environment(self):
|
||||
def test_get_environment_with_eggfile(self):
|
||||
msg = {'project': 'mybot'}
|
||||
slot = 3
|
||||
env = self.environ.get_environment(msg, slot)
|
||||
env = self.environ.get_environment(msg, slot, '/path/to/file.egg')
|
||||
self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
|
||||
self.assert_(env['SCRAPY_SQLITE_DB'].endswith('mybot.db'))
|
||||
self.assert_(env['SCRAPY_LOG_FILE'].endswith('slot3.log'))
|
||||
self.assert_(env['SCRAPY_EGGFILE'].endswith('/path/to/file.egg'))
|
||||
self.failIf('SCRAPY_SETTINGS_MODULE' in env)
|
||||
|
||||
def test_get_environment_without_eggfile(self):
|
||||
msg = {'project': 'newbot'}
|
||||
slot = 3
|
||||
env = self.environ.get_environment(msg, slot, None)
|
||||
self.assertEqual(env['SCRAPY_PROJECT'], 'newbot')
|
||||
self.assert_(env['SCRAPY_SQLITE_DB'].endswith('newbot.db'))
|
||||
self.assert_(env['SCRAPY_LOG_FILE'].endswith('slot3.log'))
|
||||
self.assertEqual(env['SCRAPY_SETTINGS_MODULE'], 'newbot.settings')
|
||||
self.failIf('SCRAPY_EGGFILE' in env)
|
||||
|
Loading…
x
Reference in New Issue
Block a user