1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 21:23:50 +00:00

Some changes to Scrapyd to support multiple configuration files, to make it easier to deploy Scrapyd applications. Also documented 'egg_runner' and 'application' options

--HG--
rename : debian/scrapyd.cfg => debian/000-default
rename : scrapyd/default_scrapyd.cfg => scrapyd/default_scrapyd.conf
This commit is contained in:
Pablo Hoffman 2010-09-07 09:17:25 -03:00
parent 3414bf13ee
commit 9158e9d682
10 changed files with 64 additions and 28 deletions

View File

@ -1,3 +1,3 @@
usr/lib/python*/*-packages/scrapyd
debian/scrapyd.cfg etc
debian/000-default etc/scrapyd/conf.d
extras/scrapyd.tac usr/share/scrapyd

View File

@ -85,10 +85,10 @@ This will install Scrapyd in your Ubuntu server creating a ``scrapy`` user
which Scrapyd will run as. It will also create some directories and files that
are listed below:
/etc/scrapyd.cfg
~~~~~~~~~~~~~~~~
/etc/scrapyd
~~~~~~~~~~~~
Scrapyd configuration file. See :ref:`topics-scrapyd-config`.
Scrapyd configuration files. See :ref:`topics-scrapyd-config`.
/var/log/scrapyd/scrapyd.log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -124,7 +124,16 @@ Directory used to store data files (uploaded eggs and spider queues).
Scrapyd Configuration file
==========================
The Scrapyd configuration file supports the following options:
Scrapyd searches for configuration files in the following locations, and parses
them in order with the latest ones taking more priority:
* ``/etc/scrapyd/scrapyd.conf`` (Unix)
* ``c:\scrapyd\scrapyd.conf`` (Windows)
* ``/etc/scrapyd/conf.d/*`` (in alphabetical order, Unix)
* ``scrapyd.conf``
The configuration file supports the following options (see default values in
the :ref:`example <topics-scrapyd-config-example>`).
http_port
---------
@ -160,12 +169,29 @@ logs_dir
The directory where the Scrapy processes logs (``slotN.log``) will be stored.
egg_runner
----------
The module that will be used for launching sub-processes. You can customize the
Scrapy processes launched from Scrapyd by using your own module.
application
-----------
A function that returns the (Twisted) Application object to use. This can be
used if you want to extend Scrapyd by adding and removing your own components
and services.
For more info see `Twisted Application Framework`_
.. _topics-scrapyd-config-example:
Example configuration file
--------------------------
Here is an example configuration file with all the defaults:
.. literalinclude:: ../../scrapyd/default_scrapyd.cfg
.. literalinclude:: ../../scrapyd/default_scrapyd.conf
Eggifying your project
======================

View File

@ -1,2 +1,2 @@
from scrapyd.app import get_application
from scrapyd import get_application
application = get_application()

View File

@ -0,0 +1,8 @@
from scrapy.utils.misc import load_object
from .config import Config
def get_application():
config = Config()
apppath = config.get('application', 'scrapyd.app.application')
appfunc = load_object(apppath)
return appfunc()

View File

@ -1,7 +1,4 @@
import sys, os
from twisted.internet import reactor
from twisted.application.service import Application, Service
from twisted.application.service import Application
from twisted.application.internet import TimerService, TCPServer
from twisted.web import server
@ -14,8 +11,8 @@ from .environ import Environment
from .webservice import Root
from .config import Config
def get_application():
app = Application("Scrapy")
def application():
app = Application("Scrapyd")
config = Config()
http_port = config.getint('http_port', 6800)

View File

@ -1,23 +1,27 @@
import glob
import pkgutil
from cStringIO import StringIO
from ConfigParser import SafeConfigParser, NoSectionError, NoOptionError
from scrapy.utils.conf import get_sources
class Config(object):
"""A ConfigParser wrapper to support defaults when calling instance
methods, and also tied to a single section"""
SOURCES = ['scrapyd.cfg', '/etc/scrapyd.cfg']
SECTION = 'scrapyd'
def __init__(self):
sources = self.SOURCES + get_sources()
default_config = pkgutil.get_data(__package__, 'default_scrapyd.cfg')
sources = self._getsources()
default_config = pkgutil.get_data(__package__, 'default_scrapyd.conf')
self.cp = SafeConfigParser()
self.cp.readfp(StringIO(default_config))
self.cp.read(sources)
def _getsources(self):
sources = ['/etc/scrapyd/scrapyd.conf', r'c:\scrapyd\scrapyd.conf']
sources += sorted(glob.glob('/etc/scrapyd/conf.d/*'))
sources += ['scrapyd.conf']
return sources
def _getany(self, method, option, default):
try:
return method(self.SECTION, option)

View File

@ -1,8 +0,0 @@
[scrapyd]
eggs_dir = eggs
logs_dir = logs
dbs_dir = dbs
max_proc = 0
http_port = 6800
debug = off
egg_runner = scrapyd.eggrunner

View File

@ -0,0 +1,9 @@
[scrapyd]
eggs_dir = eggs
logs_dir = logs
dbs_dir = dbs
max_proc = 0
http_port = 6800
debug = off
egg_runner = scrapyd.eggrunner
application = scrapyd.app.application

View File

@ -19,8 +19,8 @@ class Launcher(Service):
def startService(self):
for slot in range(self.max_proc):
self._wait_for_project(slot)
log.msg("Launcher started: max_proc=%r, egg_runner=%r" % \
(self.max_proc, self.egg_runner), system="Launcher")
log.msg("%s started: max_proc=%r, egg_runner=%r" % (self.parent.name, \
self.max_proc, self.egg_runner), system="Launcher")
def _wait_for_project(self, slot):
poller = self.app.getComponent(IPoller)