from datetime import datetime from twisted.web import resource, static from twisted.application.service import IServiceCollection from scrapy.utils.misc import load_object from .interfaces import IPoller, IEggStorage, ISpiderScheduler from . import webservice class Root(resource.Resource): def __init__(self, config, app): resource.Resource.__init__(self) self.debug = config.getboolean('debug', False) self.runner = config.get('runner') logsdir = config.get('logs_dir') itemsdir = config.get('items_dir') self.app = app self.putChild('', Home(self)) self.putChild('logs', static.File(logsdir, 'text/plain')) self.putChild('items', static.File(itemsdir, 'text/plain')) self.putChild('jobs', Jobs(self)) services = config.items('services', ()) for servName, servClsName in services: servCls = load_object(servClsName) self.putChild(servName, servCls(self)) self.update_projects() def update_projects(self): self.poller.update_projects() self.scheduler.update_projects() @property def launcher(self): app = IServiceCollection(self.app, self.app) return app.getServiceNamed('launcher') @property def scheduler(self): return self.app.getComponent(ISpiderScheduler) @property def eggstorage(self): return self.app.getComponent(IEggStorage) @property def poller(self): return self.app.getComponent(IPoller) class Home(resource.Resource): def __init__(self, root): resource.Resource.__init__(self) self.root = root def render_GET(self, txrequest): vars = { 'projects': ', '.join(self.root.scheduler.list_projects()), } return """
Available projects: %(projects)s
To schedule a spider you need to use the API (this web UI is only for monitoring)
Example using curl:
curl http://localhost:6800/schedule.json -d project=default -d spider=somespider
For more information about the API, see the Scrapyd documentation
""" % vars class Jobs(resource.Resource): def __init__(self, root): resource.Resource.__init__(self) self.root = root def render(self, txrequest): s = "Project | Spider | Job | PID | Runtime | Log | Items | " s += "
---|---|---|---|---|---|---|
Pending | ||||||
%s | " % project s += "%s | " % str(m['name']) s += "%s | " % str(m['_job']) s += "||||
Running | ||||||
%s | " % getattr(p, a) s += "%s | " % (datetime.now() - p.start_time) s += "Log | " % (p.project, p.spider, p.job) s += "Items | " % (p.project, p.spider, p.job) s += "|||
Finished | ||||||
%s | " % getattr(p, a) s += "" s += " | %s | " % (p.end_time - p.start_time) s += "Log | " % (p.project, p.spider, p.job) s += "Items | " % (p.project, p.spider, p.job) s += "