mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 01:43:50 +00:00
62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
import sys
|
|
import os
|
|
from subprocess import Popen, PIPE
|
|
from ConfigParser import NoSectionError
|
|
|
|
from scrapy.spiderqueue import SqliteSpiderQueue
|
|
from scrapy.utils.python import stringify_dict, unicode_to_str
|
|
from scrapyd.config import Config
|
|
|
|
def get_spider_queues(config):
|
|
"""Return a dict of Spider Quees keyed by project name"""
|
|
dbsdir = config.get('dbs_dir', 'dbs')
|
|
if not os.path.exists(dbsdir):
|
|
os.makedirs(dbsdir)
|
|
d = {}
|
|
for project in get_project_list(config):
|
|
dbpath = os.path.join(dbsdir, '%s.db' % project)
|
|
d[project] = SqliteSpiderQueue(dbpath)
|
|
return d
|
|
|
|
def get_project_list(config):
|
|
"""Get list of projects by inspecting the eggs dir and the ones defined in
|
|
the scrapyd.conf [settings] section
|
|
"""
|
|
eggs_dir = config.get('eggs_dir', 'eggs')
|
|
if os.path.exists(eggs_dir):
|
|
projects = os.listdir(eggs_dir)
|
|
else:
|
|
projects = []
|
|
try:
|
|
projects += [x[0] for x in config.cp.items('settings')]
|
|
except NoSectionError:
|
|
pass
|
|
return projects
|
|
|
|
def get_crawl_args(message):
|
|
"""Return the command-line arguments to use for the scrapy crawl process
|
|
that will be started for this message
|
|
"""
|
|
msg = message.copy()
|
|
args = [unicode_to_str(msg['_spider'])]
|
|
del msg['_project'], msg['_spider']
|
|
for k, v in stringify_dict(msg, keys_only=False).items():
|
|
args += ['-a']
|
|
args += ['%s=%s' % (k, v)]
|
|
return args
|
|
|
|
def get_spider_list(project, runner=None):
|
|
"""Return the spider list from the given project, using the given runner"""
|
|
if runner is None:
|
|
runner = Config().get('runner')
|
|
env = os.environ.copy()
|
|
env['SCRAPY_PROJECT'] = project
|
|
pargs = [sys.executable, '-m', runner, 'list']
|
|
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
|
|
out, err = proc.communicate()
|
|
if proc.returncode:
|
|
msg = err or out or 'unknown error'
|
|
raise RuntimeError(msg.splitlines()[-1])
|
|
return out.splitlines()
|
|
|