from datetime import datetime from twisted.web import resource, static from twisted.application.service import IServiceCollection from scrapy.utils.misc import load_object from .interfaces import IPoller, IEggStorage, ISpiderScheduler from . import webservice class Root(resource.Resource): def __init__(self, config, app): resource.Resource.__init__(self) self.debug = config.getboolean('debug', False) self.runner = config.get('runner') logsdir = config.get('logs_dir') itemsdir = config.get('items_dir') self.app = app self.putChild('', Home(self)) self.putChild('logs', static.File(logsdir, 'text/plain')) self.putChild('items', static.File(itemsdir, 'text/plain')) self.putChild('jobs', Jobs(self)) services = config.items('services', ()) for servName, servClsName in services: servCls = load_object(servClsName) self.putChild(servName, servCls(self)) self.update_projects() def update_projects(self): self.poller.update_projects() self.scheduler.update_projects() @property def launcher(self): app = IServiceCollection(self.app, self.app) return app.getServiceNamed('launcher') @property def scheduler(self): return self.app.getComponent(ISpiderScheduler) @property def eggstorage(self): return self.app.getComponent(IEggStorage) @property def poller(self): return self.app.getComponent(IPoller) class Home(resource.Resource): def __init__(self, root): resource.Resource.__init__(self) self.root = root def render_GET(self, txrequest): vars = { 'projects': ', '.join(self.root.scheduler.list_projects()), } return """ Scrapyd

Scrapyd

Available projects: %(projects)s

How to schedule a spider?

To schedule a spider you need to use the API (this web UI is only for monitoring)

Example using curl:

curl http://localhost:6800/schedule.json -d project=default -d spider=somespider

For more information about the API, see the Scrapyd documentation

""" % vars class Jobs(resource.Resource): def __init__(self, root): resource.Resource.__init__(self) self.root = root def render(self, txrequest): s = "Scrapyd" s += "" s += "

Jobs

" s += "

Go back

" s += "" s += "" s += "" for project, queue in self.root.poller.queues.items(): for m in queue.list(): s += "" s += "" % project s += "" % str(m['name']) s += "" % str(m['_job']) s += "" s += "" for p in self.root.launcher.processes.values(): s += "" for a in ['project', 'spider', 'job', 'pid']: s += "" % getattr(p, a) s += "" % (datetime.now() - p.start_time) s += "" % (p.project, p.spider, p.job) s += "" % (p.project, p.spider, p.job) s += "" s += "" for p in self.root.launcher.finished: s += "" for a in ['project', 'spider', 'job']: s += "" % getattr(p, a) s += "" s += "" % (p.end_time - p.start_time) s += "" % (p.project, p.spider, p.job) s += "" % (p.project, p.spider, p.job) s += "" s += "
ProjectSpiderJobPIDRuntimeLogItems
Pending
%s%s%s
Running
%s%sLogItems
Finished
%s%sLogItems
" s += "" s += "" return s