Some changes to Scrapyd to support multiple configuration files, to make it easier to deploy Scrapyd applications. Also documented 'egg_runner' and 'application' options

--HG-- rename : debian/scrapyd.cfg => debian/000-default rename : scrapyd/default_scrapyd.cfg => scrapyd/default_scrapyd.conf
2025-02-22 21:23:50 +00:00 · 2010-09-07 09:17:25 -03:00 · 2010-09-07 09:17:25 -03:00 · 9158e9d682
commit 9158e9d682
parent 3414bf13ee
10 changed files with 64 additions and 28 deletions
--- a/debian/000-default
+++ b/debian/000-default
--- a/debian/scrapyd.install
+++ b/debian/scrapyd.install
@ -1,3 +1,3 @@
 usr/lib/python*/*-packages/scrapyd
-debian/scrapyd.cfg etc
+debian/000-default etc/scrapyd/conf.d
 extras/scrapyd.tac usr/share/scrapyd
--- a/docs/topics/scrapyd.rst
+++ b/docs/topics/scrapyd.rst
@ -85,10 +85,10 @@ This will install Scrapyd in your Ubuntu server creating a ``scrapy`` user
 which Scrapyd will run as. It will also create some directories and files that
 are listed below:

-/etc/scrapyd.cfg
-~~~~~~~~~~~~~~~~
+/etc/scrapyd
+~~~~~~~~~~~~

-Scrapyd configuration file. See :ref:`topics-scrapyd-config`.
+Scrapyd configuration files. See :ref:`topics-scrapyd-config`.

 /var/log/scrapyd/scrapyd.log
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -124,7 +124,16 @@ Directory used to store data files (uploaded eggs and spider queues).
 Scrapyd Configuration file
 ==========================

-The Scrapyd configuration file supports the following options:
+Scrapyd searches for configuration files in the following locations, and parses
+them in order with the latest ones taking more priority:
+
+* ``/etc/scrapyd/scrapyd.conf`` (Unix)
+* ``c:\scrapyd\scrapyd.conf`` (Windows)
+* ``/etc/scrapyd/conf.d/*`` (in alphabetical order, Unix)
+* ``scrapyd.conf``
+
+The configuration file supports the following options (see default values in
+the :ref:`example <topics-scrapyd-config-example>`).

 http_port
 ---------
@ -160,12 +169,29 @@ logs_dir

 The directory where the Scrapy processes logs (``slotN.log``) will be stored.

+egg_runner
+----------
+
+The module that will be used for launching sub-processes. You can customize the
+Scrapy processes launched from Scrapyd by using your own module.
+
+application
+-----------
+
+A function that returns the (Twisted) Application object to use. This can be
+used if you want to extend Scrapyd by adding and removing your own components
+and services.
+
+For more info see `Twisted Application Framework`_
+
+.. _topics-scrapyd-config-example:
+
 Example configuration file
 --------------------------

 Here is an example configuration file with all the defaults:

-.. literalinclude:: ../../scrapyd/default_scrapyd.cfg
+.. literalinclude:: ../../scrapyd/default_scrapyd.conf

 Eggifying your project
 ======================
--- a/extras/scrapyd.tac
+++ b/extras/scrapyd.tac
@ -1,2 +1,2 @@
-from scrapyd.app import get_application
+from scrapyd import get_application
 application = get_application()
--- a/scrapyd/init.py
+++ b/scrapyd/init.py
@ -0,0 +1,8 @@
+from scrapy.utils.misc import load_object
+from .config import Config
+
+def get_application():
+    config = Config()
+    apppath = config.get('application', 'scrapyd.app.application')
+    appfunc = load_object(apppath)
+    return appfunc()
--- a/scrapyd/app.py
+++ b/scrapyd/app.py
@ -1,7 +1,4 @@
-import sys, os
-
-from twisted.internet import reactor
-from twisted.application.service import Application, Service
+from twisted.application.service import Application
 from twisted.application.internet import TimerService, TCPServer
 from twisted.web import server

@ -14,8 +11,8 @@ from .environ import Environment
 from .webservice import Root
 from .config import Config

-def get_application():
-    app = Application("Scrapy")
+def application():
+    app = Application("Scrapyd")
    config = Config()
    http_port = config.getint('http_port', 6800)

--- a/scrapyd/config.py
+++ b/scrapyd/config.py
@ -1,23 +1,27 @@
+import glob
 import pkgutil
 from cStringIO import StringIO
 from ConfigParser import SafeConfigParser, NoSectionError, NoOptionError

-from scrapy.utils.conf import get_sources
-
 class Config(object):
    """A ConfigParser wrapper to support defaults when calling instance
    methods, and also tied to a single section"""

-    SOURCES = ['scrapyd.cfg', '/etc/scrapyd.cfg']
    SECTION = 'scrapyd'

    def __init__(self):
-        sources = self.SOURCES + get_sources()
-        default_config = pkgutil.get_data(__package__, 'default_scrapyd.cfg')
+        sources = self._getsources()
+        default_config = pkgutil.get_data(__package__, 'default_scrapyd.conf')
        self.cp = SafeConfigParser()
        self.cp.readfp(StringIO(default_config))
        self.cp.read(sources)

+    def _getsources(self):
+        sources = ['/etc/scrapyd/scrapyd.conf', r'c:\scrapyd\scrapyd.conf']
+        sources += sorted(glob.glob('/etc/scrapyd/conf.d/*'))
+        sources += ['scrapyd.conf']
+        return sources
+
    def _getany(self, method, option, default):
        try:
            return method(self.SECTION, option)
--- a/scrapyd/default_scrapyd.cfg
+++ b/scrapyd/default_scrapyd.cfg
@ -1,8 +0,0 @@
-[scrapyd]
-eggs_dir   = eggs
-logs_dir   = logs
-dbs_dir    = dbs
-max_proc   = 0
-http_port  = 6800
-debug      = off
-egg_runner = scrapyd.eggrunner
--- a/scrapyd/default_scrapyd.conf
+++ b/scrapyd/default_scrapyd.conf
@ -0,0 +1,9 @@
+[scrapyd]
+eggs_dir    = eggs
+logs_dir    = logs
+dbs_dir     = dbs
+max_proc    = 0
+http_port   = 6800
+debug       = off
+egg_runner  = scrapyd.eggrunner
+application = scrapyd.app.application
--- a/scrapyd/launcher.py
+++ b/scrapyd/launcher.py
@ -19,8 +19,8 @@ class Launcher(Service):
    def startService(self):
        for slot in range(self.max_proc):
            self._wait_for_project(slot)
-        log.msg("Launcher started: max_proc=%r, egg_runner=%r" % \
-            (self.max_proc, self.egg_runner), system="Launcher")
+        log.msg("%s started: max_proc=%r, egg_runner=%r" % (self.parent.name, \
+            self.max_proc, self.egg_runner), system="Launcher")

    def _wait_for_project(self, slot):
        poller = self.app.getComponent(IPoller)