mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 18:04:11 +00:00
* multiple projects * uploading scrapy projects as Python eggs * scheduling spiders using a JSON API Documentation is added along with the code. Closes #218. --HG-- rename : debian/scrapy-service.default => debian/scrapyd.default rename : debian/scrapy-service.dirs => debian/scrapyd.dirs rename : debian/scrapy-service.install => debian/scrapyd.install rename : debian/scrapy-service.lintian-overrides => debian/scrapyd.lintian-overrides rename : debian/scrapy-service.postinst => debian/scrapyd.postinst rename : debian/scrapy-service.postrm => debian/scrapyd.postrm rename : debian/scrapy-service.upstart => debian/scrapyd.upstart rename : extras/scrapy.tac => extras/scrapyd.tac
33 lines
1.3 KiB
Python
33 lines
1.3 KiB
Python
import os, sys, shutil, pkg_resources
|
|
from subprocess import Popen, PIPE
|
|
from tempfile import NamedTemporaryFile, mkdtemp
|
|
|
|
def get_spider_list_from_eggfile(eggfile, project):
|
|
# FIXME: we use a temporary directory here to avoid permissions problems
|
|
# when running as system service, as "scrapy list" command tries to write
|
|
# the scrapy.db sqlite database in current directory
|
|
tmpdir = mkdtemp()
|
|
try:
|
|
with NamedTemporaryFile(suffix='.egg', dir=tmpdir) as f:
|
|
shutil.copyfileobj(eggfile, f)
|
|
f.flush()
|
|
eggfile.seek(0)
|
|
pargs = [sys.executable, '-m', 'scrapyd.eggrunner', f.name, 'list']
|
|
env = os.environ.copy()
|
|
env['SCRAPY_PROJECT'] = project
|
|
proc = Popen(pargs, stdout=PIPE, cwd=tmpdir, env=env)
|
|
out = proc.communicate()[0]
|
|
return out.splitlines()
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
|
|
def activate_egg(eggpath):
|
|
"""Activate a Scrapy egg file. This is meant to be used from egg runners
|
|
to activate a Scrapy egg file. Don't use it from other code as it may
|
|
leave unwanted side effects.
|
|
"""
|
|
d = pkg_resources.find_distributions(eggpath).next()
|
|
d.activate()
|
|
settings_module = d.get_entry_info('scrapy', 'settings').module_name
|
|
os.environ['SCRAPY_SETTINGS_MODULE'] = settings_module
|