2010-11-30 02:26:31 -02:00
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
from twisted.web import resource, static
|
|
|
|
from twisted.application.service import IServiceCollection
|
|
|
|
from .interfaces import IPoller, IEggStorage, ISpiderScheduler
|
|
|
|
|
|
|
|
from . import webservice
|
|
|
|
|
|
|
|
class Root(resource.Resource):
|
|
|
|
|
|
|
|
def __init__(self, config, app):
|
|
|
|
resource.Resource.__init__(self)
|
|
|
|
self.debug = config.getboolean('debug', False)
|
2010-12-27 16:22:32 -02:00
|
|
|
self.runner = config.get('runner')
|
2010-11-30 02:26:31 -02:00
|
|
|
logsdir = config.get('logs_dir')
|
|
|
|
self.app = app
|
2010-11-30 18:43:59 -02:00
|
|
|
self.putChild('', Home(self))
|
2010-11-30 02:26:31 -02:00
|
|
|
self.putChild('schedule.json', webservice.Schedule(self))
|
|
|
|
self.putChild('addversion.json', webservice.AddVersion(self))
|
|
|
|
self.putChild('listprojects.json', webservice.ListProjects(self))
|
|
|
|
self.putChild('listversions.json', webservice.ListVersions(self))
|
|
|
|
self.putChild('listspiders.json', webservice.ListSpiders(self))
|
|
|
|
self.putChild('delproject.json', webservice.DeleteProject(self))
|
|
|
|
self.putChild('delversion.json', webservice.DeleteVersion(self))
|
2011-03-09 14:22:10 -02:00
|
|
|
self.putChild('listjobs.json', webservice.ListJobs(self))
|
2010-11-30 02:26:31 -02:00
|
|
|
self.putChild('logs', static.File(logsdir, 'text/plain'))
|
|
|
|
self.putChild('procmon', ProcessMonitor(self))
|
|
|
|
self.update_projects()
|
|
|
|
|
|
|
|
def update_projects(self):
|
|
|
|
self.poller.update_projects()
|
|
|
|
self.scheduler.update_projects()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def launcher(self):
|
|
|
|
app = IServiceCollection(self.app, self.app)
|
|
|
|
return app.getServiceNamed('launcher')
|
|
|
|
|
|
|
|
@property
|
|
|
|
def scheduler(self):
|
|
|
|
return self.app.getComponent(ISpiderScheduler)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def eggstorage(self):
|
|
|
|
return self.app.getComponent(IEggStorage)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def poller(self):
|
|
|
|
return self.app.getComponent(IPoller)
|
|
|
|
|
|
|
|
|
|
|
|
class Home(resource.Resource):
|
|
|
|
|
2010-11-30 18:43:59 -02:00
|
|
|
def __init__(self, root):
|
|
|
|
resource.Resource.__init__(self)
|
|
|
|
self.root = root
|
|
|
|
|
2010-11-30 02:26:31 -02:00
|
|
|
def render_GET(self, txrequest):
|
2010-11-30 18:43:59 -02:00
|
|
|
vars = {
|
|
|
|
'projects': ', '.join(self.root.scheduler.list_projects()),
|
|
|
|
}
|
2010-11-30 02:26:31 -02:00
|
|
|
return """
|
|
|
|
<html>
|
|
|
|
<head><title>Scrapyd</title></head>
|
|
|
|
<body>
|
|
|
|
<h1>Scrapyd</h1>
|
2010-11-30 18:43:59 -02:00
|
|
|
<p>Available projects: <b>%(projects)s</b></p>
|
2010-11-30 02:26:31 -02:00
|
|
|
<ul>
|
2010-11-30 13:03:20 -02:00
|
|
|
<li><a href="/procmon">Process monitor</a></li>
|
|
|
|
<li><a href="/logs/">Logs</li>
|
2010-11-30 02:26:31 -02:00
|
|
|
<li><a href="http://doc.scrapy.org/topics/scrapyd.html">Documentation</a></li>
|
|
|
|
</ul>
|
2010-11-30 18:43:59 -02:00
|
|
|
|
|
|
|
<h2>How to schedule a spider?</h2>
|
|
|
|
|
|
|
|
<p>To schedule a spider you need to use the API (this web UI is only for
|
|
|
|
monitoring)</p>
|
|
|
|
|
|
|
|
<p>Example using <a href="http://curl.haxx.se/">curl</a>:</p>
|
|
|
|
<p><code>curl http://localhost:6800/schedule.json -d project=default -d spider=somespider</code></p>
|
|
|
|
|
|
|
|
<p>For more information about the API, see the <a href="http://doc.scrapy.org/topics/scrapyd.html">Scrapyd documentation</a></p>
|
2010-11-30 02:26:31 -02:00
|
|
|
</body>
|
|
|
|
</html>
|
2010-11-30 18:43:59 -02:00
|
|
|
""" % vars
|
2010-11-30 02:26:31 -02:00
|
|
|
|
|
|
|
|
|
|
|
class ProcessMonitor(resource.Resource):
|
|
|
|
|
|
|
|
def __init__(self, root):
|
|
|
|
resource.Resource.__init__(self)
|
|
|
|
self.root = root
|
|
|
|
|
|
|
|
def render(self, txrequest):
|
|
|
|
s = "<html><head><title>Scrapyd</title></title>"
|
|
|
|
s += "<body>"
|
|
|
|
s += "<h1>Process monitor</h1>"
|
|
|
|
s += "<p><a href='..'>Go back</a></p>"
|
|
|
|
s += "<table border='1'>"
|
|
|
|
s += "<tr>"
|
2010-11-30 13:03:20 -02:00
|
|
|
s += "<th>Project</th><th>Spider</th><th>Job</th><th>PID</th><th>Runtime</th><th>Log</th>"
|
2010-11-30 02:26:31 -02:00
|
|
|
s += "</tr>"
|
|
|
|
for p in self.root.launcher.processes.values():
|
|
|
|
s += "<tr>"
|
|
|
|
for a in ['project', 'spider', 'job', 'pid']:
|
|
|
|
s += "<td>%s</td>" % getattr(p, a)
|
|
|
|
s += "<td>%s</td>" % (datetime.now() - p.start_time)
|
2010-11-30 13:03:20 -02:00
|
|
|
s += "<td><a href='/logs/%s/%s/%s.log'>Log</a></td>" % (p.project, p.spider, p.job)
|
2010-11-30 02:26:31 -02:00
|
|
|
s += "</tr>"
|
|
|
|
s += "</table>"
|
|
|
|
s += "</body>"
|
|
|
|
s += "</html>"
|
|
|
|
return s
|
|
|
|
|