From 053d45e79f394dc3dcd356505b7ed43fd0e82c0a Mon Sep 17 00:00:00 2001 From: Pablo Hoffman Date: Sun, 22 Aug 2010 01:24:07 -0300 Subject: [PATCH] Splitted stats collector classes from stats collection facility (#204) * moved scrapy.stats.collector.__init__ module to scrapy.statscol * moved scrapy.stats.collector.simpledb module to scrapy.contrib.statscol * moved signals from scrapy.stats.signals to scrapy.signals * moved scrapy/stats/__init__.py to scrapy/stats.py * updated documentation and tests accordingly --HG-- rename : scrapy/stats/collector/simpledb.py => scrapy/contrib/statscol.py rename : scrapy/stats/__init__.py => scrapy/stats.py rename : scrapy/stats/collector/__init__.py => scrapy/statscol.py --- docs/topics/settings.rst | 2 +- docs/topics/stats.rst | 13 ++++++------- scrapy/conf/default_settings.py | 2 +- scrapy/contrib/corestats.py | 5 ++--- .../collector/simpledb.py => contrib/statscol.py} | 9 +++++---- scrapy/contrib/statsmailer.py | 3 ++- scrapy/signals.py | 3 +++ scrapy/{stats/__init__.py => stats.py} | 2 +- scrapy/stats/signals.py | 3 --- scrapy/{stats/collector/__init__.py => statscol.py} | 2 +- scrapy/tests/test_engine.py | 7 +++---- scrapy/tests/test_stats.py | 4 ++-- 12 files changed, 27 insertions(+), 28 deletions(-) rename scrapy/{stats/collector/simpledb.py => contrib/statscol.py} (87%) rename scrapy/{stats/__init__.py => stats.py} (83%) delete mode 100644 scrapy/stats/signals.py rename scrapy/{stats/collector/__init__.py => statscol.py} (97%) diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index 7f5aba5c7..8bbbd967c 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -934,7 +934,7 @@ Example:: STATS_CLASS ----------- -Default: ``'scrapy.stats.collector.MemoryStatsCollector'`` +Default: ``'scrapy.statscol.MemoryStatsCollector'`` The class to use for collecting stats (must implement the Stats Collector API, or subclass the StatsCollector class). diff --git a/docs/topics/stats.rst b/docs/topics/stats.rst index 81021f230..072164e5b 100644 --- a/docs/topics/stats.rst +++ b/docs/topics/stats.rst @@ -101,11 +101,11 @@ Stats Collector API =================== There are several Stats Collectors available under the -:mod:`scrapy.stats.collector` module and they all implement the Stats -Collector API defined by the :class:`~scrapy.stats.collector.StatsCollector` +:mod:`scrapy.statscol` module and they all implement the Stats +Collector API defined by the :class:`~scrapy.statscol.StatsCollector` class (which they all inherit from). -.. module:: scrapy.stats.collector +.. module:: scrapy.statscol :synopsis: Basic Stats Collectors .. class:: StatsCollector @@ -226,8 +226,8 @@ DummyStatsCollector SimpledbStatsCollector ---------------------- -.. module:: scrapy.stats.collector.simpledb - :synopsis: Simpledb Stats Collector +.. module:: scrapy.contrib.statscol + :synopsis: Additional Stats Collectors .. class:: SimpledbStatsCollector @@ -288,8 +288,7 @@ Stats signals The Stats Collector provides some signals for extending the stats collection functionality: -.. module:: scrapy.stats.signals - :synopsis: Stats Collector signals +.. currentmodule:: scrapy.signals .. signal:: stats_spider_opened .. function:: stats_spider_opened(spider) diff --git a/scrapy/conf/default_settings.py b/scrapy/conf/default_settings.py index b0b3cd1f0..ba3db6014 100644 --- a/scrapy/conf/default_settings.py +++ b/scrapy/conf/default_settings.py @@ -234,7 +234,7 @@ SQS_VISIBILITY_TIMEOUT = 7200 SQS_POLLING_DELAY = 30 SQS_REGION = 'us-east-1' -STATS_CLASS = 'scrapy.stats.collector.MemoryStatsCollector' +STATS_CLASS = 'scrapy.statscol.MemoryStatsCollector' STATS_ENABLED = True STATS_DUMP = False diff --git a/scrapy/contrib/corestats.py b/scrapy/contrib/corestats.py index d3d980401..bafadd65a 100644 --- a/scrapy/contrib/corestats.py +++ b/scrapy/contrib/corestats.py @@ -10,7 +10,6 @@ from scrapy.xlib.pydispatch import dispatcher from scrapy import signals from scrapy.stats import stats -from scrapy.stats.signals import stats_spider_opened, stats_spider_closing from scrapy.conf import settings class CoreStats(object): @@ -22,8 +21,8 @@ class CoreStats(object): stats.set_value('envinfo/logfile', settings['LOG_FILE']) stats.set_value('envinfo/pid', os.getpid()) - dispatcher.connect(self.stats_spider_opened, signal=stats_spider_opened) - dispatcher.connect(self.stats_spider_closing, signal=stats_spider_closing) + dispatcher.connect(self.stats_spider_opened, signal=signals.stats_spider_opened) + dispatcher.connect(self.stats_spider_closing, signal=signals.stats_spider_closing) dispatcher.connect(self.item_scraped, signal=signals.item_scraped) dispatcher.connect(self.item_passed, signal=signals.item_passed) dispatcher.connect(self.item_dropped, signal=signals.item_dropped) diff --git a/scrapy/stats/collector/simpledb.py b/scrapy/contrib/statscol.py similarity index 87% rename from scrapy/stats/collector/simpledb.py rename to scrapy/contrib/statscol.py index d521c5e40..d44052c1b 100644 --- a/scrapy/stats/collector/simpledb.py +++ b/scrapy/contrib/statscol.py @@ -6,11 +6,10 @@ Requires the boto library: http://code.google.com/p/boto/ from datetime import datetime -from boto import connect_sdb from twisted.internet import threads from scrapy.utils.simpledb import to_sdb_value -from scrapy.stats.collector import StatsCollector +from scrapy.statscol import StatsCollector from scrapy import log from scrapy.conf import settings @@ -20,7 +19,9 @@ class SimpledbStatsCollector(StatsCollector): super(SimpledbStatsCollector, self).__init__() self._sdbdomain = settings['STATS_SDB_DOMAIN'] self._async = settings.getbool('STATS_SDB_ASYNC') - connect_sdb().create_domain(self._sdbdomain) + import boto + self.connect_sdb = boto.connect_sdb + self.connect_sdb().create_domain(self._sdbdomain) def _persist_stats(self, stats, spider=None): if spider is None: # only store spider-specific stats @@ -40,7 +41,7 @@ class SimpledbStatsCollector(StatsCollector): sdb_item = dict((k, self._to_sdb_value(v, k)) for k, v in stats.iteritems()) sdb_item['spider'] = spider.name sdb_item['timestamp'] = self._to_sdb_value(ts) - connect_sdb().put_attributes(self._sdbdomain, sdb_item_id, sdb_item) + self.connect_sdb().put_attributes(self._sdbdomain, sdb_item_id, sdb_item) def _get_timestamp(self, spider): return datetime.utcnow() diff --git a/scrapy/contrib/statsmailer.py b/scrapy/contrib/statsmailer.py index 2398ad287..21cce76fe 100644 --- a/scrapy/contrib/statsmailer.py +++ b/scrapy/contrib/statsmailer.py @@ -6,7 +6,8 @@ Use STATSMAILER_RCPTS setting to enable and give the recipient mail address from scrapy.xlib.pydispatch import dispatcher -from scrapy.stats import stats, signals +from scrapy.stats import stats +from scrapy import signals from scrapy.mail import MailSender from scrapy.conf import settings from scrapy.exceptions import NotConfigured diff --git a/scrapy/signals.py b/scrapy/signals.py index 0b239c20d..88665404e 100644 --- a/scrapy/signals.py +++ b/scrapy/signals.py @@ -17,3 +17,6 @@ response_downloaded = object() item_scraped = object() item_passed = object() item_dropped = object() +stats_spider_opened = object() +stats_spider_closing = object() +stats_spider_closed = object() diff --git a/scrapy/stats/__init__.py b/scrapy/stats.py similarity index 83% rename from scrapy/stats/__init__.py rename to scrapy/stats.py index 65761fc0d..37d9c6888 100644 --- a/scrapy/stats/__init__.py +++ b/scrapy/stats.py @@ -1,4 +1,4 @@ -from scrapy.stats.collector import DummyStatsCollector +from scrapy.statscol import DummyStatsCollector from scrapy.conf import settings from scrapy.utils.misc import load_object diff --git a/scrapy/stats/signals.py b/scrapy/stats/signals.py deleted file mode 100644 index 19d3eb295..000000000 --- a/scrapy/stats/signals.py +++ /dev/null @@ -1,3 +0,0 @@ -stats_spider_opened = object() -stats_spider_closing = object() -stats_spider_closed = object() diff --git a/scrapy/stats/collector/__init__.py b/scrapy/statscol.py similarity index 97% rename from scrapy/stats/collector/__init__.py rename to scrapy/statscol.py index 85b4f4e51..c69510dd0 100644 --- a/scrapy/stats/collector/__init__.py +++ b/scrapy/statscol.py @@ -5,7 +5,7 @@ import pprint from scrapy.xlib.pydispatch import dispatcher -from scrapy.stats.signals import stats_spider_opened, stats_spider_closing, \ +from scrapy.signals import stats_spider_opened, stats_spider_closing, \ stats_spider_closed from scrapy.utils.signal import send_catch_log from scrapy import signals diff --git a/scrapy/tests/test_engine.py b/scrapy/tests/test_engine.py index e73c13bbf..69c3127fe 100644 --- a/scrapy/tests/test_engine.py +++ b/scrapy/tests/test_engine.py @@ -105,10 +105,9 @@ class CrawlingSession(object): # stats because of living signals. This whole test_engine.py should # be rewritten from scratch actually. from scrapy.utils.signal import disconnect_all - from scrapy.stats import signals as stats_signals - disconnect_all(stats_signals.stats_spider_opened) - disconnect_all(stats_signals.stats_spider_closing) - disconnect_all(stats_signals.stats_spider_closed) + disconnect_all(signals.stats_spider_opened) + disconnect_all(signals.stats_spider_closing) + disconnect_all(signals.stats_spider_closed) def geturl(self, path): return "http://localhost:%s%s" % (self.portno, path) diff --git a/scrapy/tests/test_stats.py b/scrapy/tests/test_stats.py index 786021898..a39c0bd0e 100644 --- a/scrapy/tests/test_stats.py +++ b/scrapy/tests/test_stats.py @@ -2,8 +2,8 @@ import unittest from scrapy.spider import BaseSpider from scrapy.xlib.pydispatch import dispatcher -from scrapy.stats.collector import StatsCollector, DummyStatsCollector -from scrapy.stats.signals import stats_spider_opened, stats_spider_closing, \ +from scrapy.statscol import StatsCollector, DummyStatsCollector +from scrapy.signals import stats_spider_opened, stats_spider_closing, \ stats_spider_closed class StatsCollectorTest(unittest.TestCase):