mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 20:44:15 +00:00
Splitted stats collector classes from stats collection facility (#204)
* moved scrapy.stats.collector.__init__ module to scrapy.statscol * moved scrapy.stats.collector.simpledb module to scrapy.contrib.statscol * moved signals from scrapy.stats.signals to scrapy.signals * moved scrapy/stats/__init__.py to scrapy/stats.py * updated documentation and tests accordingly --HG-- rename : scrapy/stats/collector/simpledb.py => scrapy/contrib/statscol.py rename : scrapy/stats/__init__.py => scrapy/stats.py rename : scrapy/stats/collector/__init__.py => scrapy/statscol.py
This commit is contained in:
parent
c276c48c91
commit
053d45e79f
@ -934,7 +934,7 @@ Example::
|
||||
STATS_CLASS
|
||||
-----------
|
||||
|
||||
Default: ``'scrapy.stats.collector.MemoryStatsCollector'``
|
||||
Default: ``'scrapy.statscol.MemoryStatsCollector'``
|
||||
|
||||
The class to use for collecting stats (must implement the Stats Collector API,
|
||||
or subclass the StatsCollector class).
|
||||
|
@ -101,11 +101,11 @@ Stats Collector API
|
||||
===================
|
||||
|
||||
There are several Stats Collectors available under the
|
||||
:mod:`scrapy.stats.collector` module and they all implement the Stats
|
||||
Collector API defined by the :class:`~scrapy.stats.collector.StatsCollector`
|
||||
:mod:`scrapy.statscol` module and they all implement the Stats
|
||||
Collector API defined by the :class:`~scrapy.statscol.StatsCollector`
|
||||
class (which they all inherit from).
|
||||
|
||||
.. module:: scrapy.stats.collector
|
||||
.. module:: scrapy.statscol
|
||||
:synopsis: Basic Stats Collectors
|
||||
|
||||
.. class:: StatsCollector
|
||||
@ -226,8 +226,8 @@ DummyStatsCollector
|
||||
SimpledbStatsCollector
|
||||
----------------------
|
||||
|
||||
.. module:: scrapy.stats.collector.simpledb
|
||||
:synopsis: Simpledb Stats Collector
|
||||
.. module:: scrapy.contrib.statscol
|
||||
:synopsis: Additional Stats Collectors
|
||||
|
||||
.. class:: SimpledbStatsCollector
|
||||
|
||||
@ -288,8 +288,7 @@ Stats signals
|
||||
The Stats Collector provides some signals for extending the stats collection
|
||||
functionality:
|
||||
|
||||
.. module:: scrapy.stats.signals
|
||||
:synopsis: Stats Collector signals
|
||||
.. currentmodule:: scrapy.signals
|
||||
|
||||
.. signal:: stats_spider_opened
|
||||
.. function:: stats_spider_opened(spider)
|
||||
|
@ -234,7 +234,7 @@ SQS_VISIBILITY_TIMEOUT = 7200
|
||||
SQS_POLLING_DELAY = 30
|
||||
SQS_REGION = 'us-east-1'
|
||||
|
||||
STATS_CLASS = 'scrapy.stats.collector.MemoryStatsCollector'
|
||||
STATS_CLASS = 'scrapy.statscol.MemoryStatsCollector'
|
||||
STATS_ENABLED = True
|
||||
STATS_DUMP = False
|
||||
|
||||
|
@ -10,7 +10,6 @@ from scrapy.xlib.pydispatch import dispatcher
|
||||
|
||||
from scrapy import signals
|
||||
from scrapy.stats import stats
|
||||
from scrapy.stats.signals import stats_spider_opened, stats_spider_closing
|
||||
from scrapy.conf import settings
|
||||
|
||||
class CoreStats(object):
|
||||
@ -22,8 +21,8 @@ class CoreStats(object):
|
||||
stats.set_value('envinfo/logfile', settings['LOG_FILE'])
|
||||
stats.set_value('envinfo/pid', os.getpid())
|
||||
|
||||
dispatcher.connect(self.stats_spider_opened, signal=stats_spider_opened)
|
||||
dispatcher.connect(self.stats_spider_closing, signal=stats_spider_closing)
|
||||
dispatcher.connect(self.stats_spider_opened, signal=signals.stats_spider_opened)
|
||||
dispatcher.connect(self.stats_spider_closing, signal=signals.stats_spider_closing)
|
||||
dispatcher.connect(self.item_scraped, signal=signals.item_scraped)
|
||||
dispatcher.connect(self.item_passed, signal=signals.item_passed)
|
||||
dispatcher.connect(self.item_dropped, signal=signals.item_dropped)
|
||||
|
@ -6,11 +6,10 @@ Requires the boto library: http://code.google.com/p/boto/
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from boto import connect_sdb
|
||||
from twisted.internet import threads
|
||||
|
||||
from scrapy.utils.simpledb import to_sdb_value
|
||||
from scrapy.stats.collector import StatsCollector
|
||||
from scrapy.statscol import StatsCollector
|
||||
from scrapy import log
|
||||
from scrapy.conf import settings
|
||||
|
||||
@ -20,7 +19,9 @@ class SimpledbStatsCollector(StatsCollector):
|
||||
super(SimpledbStatsCollector, self).__init__()
|
||||
self._sdbdomain = settings['STATS_SDB_DOMAIN']
|
||||
self._async = settings.getbool('STATS_SDB_ASYNC')
|
||||
connect_sdb().create_domain(self._sdbdomain)
|
||||
import boto
|
||||
self.connect_sdb = boto.connect_sdb
|
||||
self.connect_sdb().create_domain(self._sdbdomain)
|
||||
|
||||
def _persist_stats(self, stats, spider=None):
|
||||
if spider is None: # only store spider-specific stats
|
||||
@ -40,7 +41,7 @@ class SimpledbStatsCollector(StatsCollector):
|
||||
sdb_item = dict((k, self._to_sdb_value(v, k)) for k, v in stats.iteritems())
|
||||
sdb_item['spider'] = spider.name
|
||||
sdb_item['timestamp'] = self._to_sdb_value(ts)
|
||||
connect_sdb().put_attributes(self._sdbdomain, sdb_item_id, sdb_item)
|
||||
self.connect_sdb().put_attributes(self._sdbdomain, sdb_item_id, sdb_item)
|
||||
|
||||
def _get_timestamp(self, spider):
|
||||
return datetime.utcnow()
|
@ -6,7 +6,8 @@ Use STATSMAILER_RCPTS setting to enable and give the recipient mail address
|
||||
|
||||
from scrapy.xlib.pydispatch import dispatcher
|
||||
|
||||
from scrapy.stats import stats, signals
|
||||
from scrapy.stats import stats
|
||||
from scrapy import signals
|
||||
from scrapy.mail import MailSender
|
||||
from scrapy.conf import settings
|
||||
from scrapy.exceptions import NotConfigured
|
||||
|
@ -17,3 +17,6 @@ response_downloaded = object()
|
||||
item_scraped = object()
|
||||
item_passed = object()
|
||||
item_dropped = object()
|
||||
stats_spider_opened = object()
|
||||
stats_spider_closing = object()
|
||||
stats_spider_closed = object()
|
||||
|
@ -1,4 +1,4 @@
|
||||
from scrapy.stats.collector import DummyStatsCollector
|
||||
from scrapy.statscol import DummyStatsCollector
|
||||
from scrapy.conf import settings
|
||||
from scrapy.utils.misc import load_object
|
||||
|
@ -1,3 +0,0 @@
|
||||
stats_spider_opened = object()
|
||||
stats_spider_closing = object()
|
||||
stats_spider_closed = object()
|
@ -5,7 +5,7 @@ import pprint
|
||||
|
||||
from scrapy.xlib.pydispatch import dispatcher
|
||||
|
||||
from scrapy.stats.signals import stats_spider_opened, stats_spider_closing, \
|
||||
from scrapy.signals import stats_spider_opened, stats_spider_closing, \
|
||||
stats_spider_closed
|
||||
from scrapy.utils.signal import send_catch_log
|
||||
from scrapy import signals
|
@ -105,10 +105,9 @@ class CrawlingSession(object):
|
||||
# stats because of living signals. This whole test_engine.py should
|
||||
# be rewritten from scratch actually.
|
||||
from scrapy.utils.signal import disconnect_all
|
||||
from scrapy.stats import signals as stats_signals
|
||||
disconnect_all(stats_signals.stats_spider_opened)
|
||||
disconnect_all(stats_signals.stats_spider_closing)
|
||||
disconnect_all(stats_signals.stats_spider_closed)
|
||||
disconnect_all(signals.stats_spider_opened)
|
||||
disconnect_all(signals.stats_spider_closing)
|
||||
disconnect_all(signals.stats_spider_closed)
|
||||
|
||||
def geturl(self, path):
|
||||
return "http://localhost:%s%s" % (self.portno, path)
|
||||
|
@ -2,8 +2,8 @@ import unittest
|
||||
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.xlib.pydispatch import dispatcher
|
||||
from scrapy.stats.collector import StatsCollector, DummyStatsCollector
|
||||
from scrapy.stats.signals import stats_spider_opened, stats_spider_closing, \
|
||||
from scrapy.statscol import StatsCollector, DummyStatsCollector
|
||||
from scrapy.signals import stats_spider_opened, stats_spider_closing, \
|
||||
stats_spider_closed
|
||||
|
||||
class StatsCollectorTest(unittest.TestCase):
|
||||
|
Loading…
x
Reference in New Issue
Block a user