From 64ffe6b2d3f839023d48571b2ce8f734115eac5a Mon Sep 17 00:00:00 2001 From: Pablo Hoffman Date: Tue, 18 Aug 2009 19:44:25 -0300 Subject: [PATCH] added scrapy.utils.ref module for tracking references to live instances, for certain objects --- scrapy/http/request/__init__.py | 3 ++- scrapy/http/response/__init__.py | 3 ++- scrapy/item.py | 4 ++- scrapy/management/telnet.py | 2 ++ scrapy/utils/ref.py | 42 ++++++++++++++++++++++++++++++++ scrapy/xpath/selector.py | 3 ++- 6 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 scrapy/utils/ref.py diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py index ddde58ffc..201ceddac 100644 --- a/scrapy/http/request/__init__.py +++ b/scrapy/http/request/__init__.py @@ -11,8 +11,9 @@ from twisted.internet import defer from scrapy.http.headers import Headers from scrapy.utils.url import safe_url_string +from scrapy.utils.ref import object_ref -class Request(object): +class Request(object_ref): __slots__ = ['_encoding', 'method', '_url', '_body', '_meta', '_cache', \ 'dont_filter', 'headers', 'cookies', 'deferred', 'priority', \ diff --git a/scrapy/http/response/__init__.py b/scrapy/http/response/__init__.py index aae1fec18..6eb78625b 100644 --- a/scrapy/http/response/__init__.py +++ b/scrapy/http/response/__init__.py @@ -8,8 +8,9 @@ See documentation in docs/ref/request-response.rst import copy from scrapy.http.headers import Headers +from scrapy.utils.ref import object_ref -class Response(object): +class Response(object_ref): __slots__ = ['url', 'headers', 'status', '_body', 'request', '_meta', \ 'flags', '_cache', '__weakref__'] diff --git a/scrapy/item.py b/scrapy/item.py index 39cc0e7d7..ca56f27ea 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -1,4 +1,6 @@ -class BaseItem(object): +from scrapy.utils.ref import object_ref + +class BaseItem(object_ref): """Base class for all scraped items.""" pass diff --git a/scrapy/management/telnet.py b/scrapy/management/telnet.py index 36b83041b..48f651482 100644 --- a/scrapy/management/telnet.py +++ b/scrapy/management/telnet.py @@ -15,6 +15,7 @@ from scrapy.core.manager import scrapymanager from scrapy.core.engine import scrapyengine from scrapy.spider import spiders from scrapy.stats import stats +from scrapy.utils.ref import print_live_refs from scrapy.conf import settings try: @@ -32,6 +33,7 @@ telnet_namespace = { 'spiders': spiders, 'settings': settings, 'p': pprint.pprint, + 'prefs': print_live_refs, 'hpy': hpy, } diff --git a/scrapy/utils/ref.py b/scrapy/utils/ref.py new file mode 100644 index 000000000..32b35b1d8 --- /dev/null +++ b/scrapy/utils/ref.py @@ -0,0 +1,42 @@ +"""This module provides some functions and classes to record and report live +references to object instances, for certain classes""" + +import weakref +from collections import defaultdict +from time import time +from operator import itemgetter + +from scrapy.conf import settings + +live_refs = defaultdict(weakref.WeakKeyDictionary) + +class object_ref(object): + """Inherit from this class (instead of object) to a keep a record of live + instances""" + + __slots__ = () + + def __new__(cls, *args, **kwargs): + obj = object.__new__(cls) + live_refs[cls][obj] = time() + return obj + +if not settings.getbool('TRACK_REFS'): + object_ref = object + +def print_live_refs(): + print "Live References" + print + now = time() + for cls, wdict in live_refs.iteritems(): + if not wdict: + continue + oldest = min(wdict.itervalues()) + print "%-30s %6d oldest: %ds ago" % (cls.__name__, len(wdict), \ + now-oldest) + +def get_oldest(class_name): + for cls, wdict in live_refs.iteritems(): + if cls.__name__ == class_name: + if wdict: + return min(wdict.iteritems(), key=itemgetter(1))[0] diff --git a/scrapy/xpath/selector.py b/scrapy/xpath/selector.py index e89f75c23..67c272e35 100644 --- a/scrapy/xpath/selector.py +++ b/scrapy/xpath/selector.py @@ -11,9 +11,10 @@ from scrapy.xpath.factories import xmlDoc_from_html, xmlDoc_from_xml from scrapy.xpath.document import Libxml2Document from scrapy.utils.python import flatten, unicode_to_str from scrapy.utils.misc import extract_regex +from scrapy.utils.ref import object_ref from scrapy.utils.decorator import deprecated -class XPathSelector(object): +class XPathSelector(object_ref): def __init__(self, response=None, text=None, node=None, parent=None, expr=None): if parent: