1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 10:43:43 +00:00

added scrapy.utils.ref module for tracking references to live instances, for certain objects

This commit is contained in:
Pablo Hoffman 2009-08-18 19:44:25 -03:00
parent 82e4b6adcf
commit 64ffe6b2d3
6 changed files with 53 additions and 4 deletions

View File

@ -11,8 +11,9 @@ from twisted.internet import defer
from scrapy.http.headers import Headers
from scrapy.utils.url import safe_url_string
from scrapy.utils.ref import object_ref
class Request(object):
class Request(object_ref):
__slots__ = ['_encoding', 'method', '_url', '_body', '_meta', '_cache', \
'dont_filter', 'headers', 'cookies', 'deferred', 'priority', \

View File

@ -8,8 +8,9 @@ See documentation in docs/ref/request-response.rst
import copy
from scrapy.http.headers import Headers
from scrapy.utils.ref import object_ref
class Response(object):
class Response(object_ref):
__slots__ = ['url', 'headers', 'status', '_body', 'request', '_meta', \
'flags', '_cache', '__weakref__']

View File

@ -1,4 +1,6 @@
class BaseItem(object):
from scrapy.utils.ref import object_ref
class BaseItem(object_ref):
"""Base class for all scraped items."""
pass

View File

@ -15,6 +15,7 @@ from scrapy.core.manager import scrapymanager
from scrapy.core.engine import scrapyengine
from scrapy.spider import spiders
from scrapy.stats import stats
from scrapy.utils.ref import print_live_refs
from scrapy.conf import settings
try:
@ -32,6 +33,7 @@ telnet_namespace = {
'spiders': spiders,
'settings': settings,
'p': pprint.pprint,
'prefs': print_live_refs,
'hpy': hpy,
}

42
scrapy/utils/ref.py Normal file
View File

@ -0,0 +1,42 @@
"""This module provides some functions and classes to record and report live
references to object instances, for certain classes"""
import weakref
from collections import defaultdict
from time import time
from operator import itemgetter
from scrapy.conf import settings
live_refs = defaultdict(weakref.WeakKeyDictionary)
class object_ref(object):
"""Inherit from this class (instead of object) to a keep a record of live
instances"""
__slots__ = ()
def __new__(cls, *args, **kwargs):
obj = object.__new__(cls)
live_refs[cls][obj] = time()
return obj
if not settings.getbool('TRACK_REFS'):
object_ref = object
def print_live_refs():
print "Live References"
print
now = time()
for cls, wdict in live_refs.iteritems():
if not wdict:
continue
oldest = min(wdict.itervalues())
print "%-30s %6d oldest: %ds ago" % (cls.__name__, len(wdict), \
now-oldest)
def get_oldest(class_name):
for cls, wdict in live_refs.iteritems():
if cls.__name__ == class_name:
if wdict:
return min(wdict.iteritems(), key=itemgetter(1))[0]

View File

@ -11,9 +11,10 @@ from scrapy.xpath.factories import xmlDoc_from_html, xmlDoc_from_xml
from scrapy.xpath.document import Libxml2Document
from scrapy.utils.python import flatten, unicode_to_str
from scrapy.utils.misc import extract_regex
from scrapy.utils.ref import object_ref
from scrapy.utils.decorator import deprecated
class XPathSelector(object):
class XPathSelector(object_ref):
def __init__(self, response=None, text=None, node=None, parent=None, expr=None):
if parent: