1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 20:44:15 +00:00

Splitted stats collector classes from stats collection facility (#204)

* moved scrapy.stats.collector.__init__ module to scrapy.statscol
* moved scrapy.stats.collector.simpledb module to scrapy.contrib.statscol
* moved signals from scrapy.stats.signals to scrapy.signals
* moved scrapy/stats/__init__.py to scrapy/stats.py
* updated documentation and tests accordingly

--HG--
rename : scrapy/stats/collector/simpledb.py => scrapy/contrib/statscol.py
rename : scrapy/stats/__init__.py => scrapy/stats.py
rename : scrapy/stats/collector/__init__.py => scrapy/statscol.py
This commit is contained in:
Pablo Hoffman 2010-08-22 01:24:07 -03:00
parent c276c48c91
commit 053d45e79f
12 changed files with 27 additions and 28 deletions

View File

@ -934,7 +934,7 @@ Example::
STATS_CLASS
-----------
Default: ``'scrapy.stats.collector.MemoryStatsCollector'``
Default: ``'scrapy.statscol.MemoryStatsCollector'``
The class to use for collecting stats (must implement the Stats Collector API,
or subclass the StatsCollector class).

View File

@ -101,11 +101,11 @@ Stats Collector API
===================
There are several Stats Collectors available under the
:mod:`scrapy.stats.collector` module and they all implement the Stats
Collector API defined by the :class:`~scrapy.stats.collector.StatsCollector`
:mod:`scrapy.statscol` module and they all implement the Stats
Collector API defined by the :class:`~scrapy.statscol.StatsCollector`
class (which they all inherit from).
.. module:: scrapy.stats.collector
.. module:: scrapy.statscol
:synopsis: Basic Stats Collectors
.. class:: StatsCollector
@ -226,8 +226,8 @@ DummyStatsCollector
SimpledbStatsCollector
----------------------
.. module:: scrapy.stats.collector.simpledb
:synopsis: Simpledb Stats Collector
.. module:: scrapy.contrib.statscol
:synopsis: Additional Stats Collectors
.. class:: SimpledbStatsCollector
@ -288,8 +288,7 @@ Stats signals
The Stats Collector provides some signals for extending the stats collection
functionality:
.. module:: scrapy.stats.signals
:synopsis: Stats Collector signals
.. currentmodule:: scrapy.signals
.. signal:: stats_spider_opened
.. function:: stats_spider_opened(spider)

View File

@ -234,7 +234,7 @@ SQS_VISIBILITY_TIMEOUT = 7200
SQS_POLLING_DELAY = 30
SQS_REGION = 'us-east-1'
STATS_CLASS = 'scrapy.stats.collector.MemoryStatsCollector'
STATS_CLASS = 'scrapy.statscol.MemoryStatsCollector'
STATS_ENABLED = True
STATS_DUMP = False

View File

@ -10,7 +10,6 @@ from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.stats import stats
from scrapy.stats.signals import stats_spider_opened, stats_spider_closing
from scrapy.conf import settings
class CoreStats(object):
@ -22,8 +21,8 @@ class CoreStats(object):
stats.set_value('envinfo/logfile', settings['LOG_FILE'])
stats.set_value('envinfo/pid', os.getpid())
dispatcher.connect(self.stats_spider_opened, signal=stats_spider_opened)
dispatcher.connect(self.stats_spider_closing, signal=stats_spider_closing)
dispatcher.connect(self.stats_spider_opened, signal=signals.stats_spider_opened)
dispatcher.connect(self.stats_spider_closing, signal=signals.stats_spider_closing)
dispatcher.connect(self.item_scraped, signal=signals.item_scraped)
dispatcher.connect(self.item_passed, signal=signals.item_passed)
dispatcher.connect(self.item_dropped, signal=signals.item_dropped)

View File

@ -6,11 +6,10 @@ Requires the boto library: http://code.google.com/p/boto/
from datetime import datetime
from boto import connect_sdb
from twisted.internet import threads
from scrapy.utils.simpledb import to_sdb_value
from scrapy.stats.collector import StatsCollector
from scrapy.statscol import StatsCollector
from scrapy import log
from scrapy.conf import settings
@ -20,7 +19,9 @@ class SimpledbStatsCollector(StatsCollector):
super(SimpledbStatsCollector, self).__init__()
self._sdbdomain = settings['STATS_SDB_DOMAIN']
self._async = settings.getbool('STATS_SDB_ASYNC')
connect_sdb().create_domain(self._sdbdomain)
import boto
self.connect_sdb = boto.connect_sdb
self.connect_sdb().create_domain(self._sdbdomain)
def _persist_stats(self, stats, spider=None):
if spider is None: # only store spider-specific stats
@ -40,7 +41,7 @@ class SimpledbStatsCollector(StatsCollector):
sdb_item = dict((k, self._to_sdb_value(v, k)) for k, v in stats.iteritems())
sdb_item['spider'] = spider.name
sdb_item['timestamp'] = self._to_sdb_value(ts)
connect_sdb().put_attributes(self._sdbdomain, sdb_item_id, sdb_item)
self.connect_sdb().put_attributes(self._sdbdomain, sdb_item_id, sdb_item)
def _get_timestamp(self, spider):
return datetime.utcnow()

View File

@ -6,7 +6,8 @@ Use STATSMAILER_RCPTS setting to enable and give the recipient mail address
from scrapy.xlib.pydispatch import dispatcher
from scrapy.stats import stats, signals
from scrapy.stats import stats
from scrapy import signals
from scrapy.mail import MailSender
from scrapy.conf import settings
from scrapy.exceptions import NotConfigured

View File

@ -17,3 +17,6 @@ response_downloaded = object()
item_scraped = object()
item_passed = object()
item_dropped = object()
stats_spider_opened = object()
stats_spider_closing = object()
stats_spider_closed = object()

View File

@ -1,4 +1,4 @@
from scrapy.stats.collector import DummyStatsCollector
from scrapy.statscol import DummyStatsCollector
from scrapy.conf import settings
from scrapy.utils.misc import load_object

View File

@ -1,3 +0,0 @@
stats_spider_opened = object()
stats_spider_closing = object()
stats_spider_closed = object()

View File

@ -5,7 +5,7 @@ import pprint
from scrapy.xlib.pydispatch import dispatcher
from scrapy.stats.signals import stats_spider_opened, stats_spider_closing, \
from scrapy.signals import stats_spider_opened, stats_spider_closing, \
stats_spider_closed
from scrapy.utils.signal import send_catch_log
from scrapy import signals

View File

@ -105,10 +105,9 @@ class CrawlingSession(object):
# stats because of living signals. This whole test_engine.py should
# be rewritten from scratch actually.
from scrapy.utils.signal import disconnect_all
from scrapy.stats import signals as stats_signals
disconnect_all(stats_signals.stats_spider_opened)
disconnect_all(stats_signals.stats_spider_closing)
disconnect_all(stats_signals.stats_spider_closed)
disconnect_all(signals.stats_spider_opened)
disconnect_all(signals.stats_spider_closing)
disconnect_all(signals.stats_spider_closed)
def geturl(self, path):
return "http://localhost:%s%s" % (self.portno, path)

View File

@ -2,8 +2,8 @@ import unittest
from scrapy.spider import BaseSpider
from scrapy.xlib.pydispatch import dispatcher
from scrapy.stats.collector import StatsCollector, DummyStatsCollector
from scrapy.stats.signals import stats_spider_opened, stats_spider_closing, \
from scrapy.statscol import StatsCollector, DummyStatsCollector
from scrapy.signals import stats_spider_opened, stats_spider_closing, \
stats_spider_closed
class StatsCollectorTest(unittest.TestCase):