mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 10:43:43 +00:00
added scrapy.utils.ref module for tracking references to live instances, for certain objects
This commit is contained in:
parent
82e4b6adcf
commit
64ffe6b2d3
@ -11,8 +11,9 @@ from twisted.internet import defer
|
||||
|
||||
from scrapy.http.headers import Headers
|
||||
from scrapy.utils.url import safe_url_string
|
||||
from scrapy.utils.ref import object_ref
|
||||
|
||||
class Request(object):
|
||||
class Request(object_ref):
|
||||
|
||||
__slots__ = ['_encoding', 'method', '_url', '_body', '_meta', '_cache', \
|
||||
'dont_filter', 'headers', 'cookies', 'deferred', 'priority', \
|
||||
|
@ -8,8 +8,9 @@ See documentation in docs/ref/request-response.rst
|
||||
import copy
|
||||
|
||||
from scrapy.http.headers import Headers
|
||||
from scrapy.utils.ref import object_ref
|
||||
|
||||
class Response(object):
|
||||
class Response(object_ref):
|
||||
|
||||
__slots__ = ['url', 'headers', 'status', '_body', 'request', '_meta', \
|
||||
'flags', '_cache', '__weakref__']
|
||||
|
@ -1,4 +1,6 @@
|
||||
class BaseItem(object):
|
||||
from scrapy.utils.ref import object_ref
|
||||
|
||||
class BaseItem(object_ref):
|
||||
"""Base class for all scraped items."""
|
||||
pass
|
||||
|
||||
|
@ -15,6 +15,7 @@ from scrapy.core.manager import scrapymanager
|
||||
from scrapy.core.engine import scrapyengine
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.stats import stats
|
||||
from scrapy.utils.ref import print_live_refs
|
||||
from scrapy.conf import settings
|
||||
|
||||
try:
|
||||
@ -32,6 +33,7 @@ telnet_namespace = {
|
||||
'spiders': spiders,
|
||||
'settings': settings,
|
||||
'p': pprint.pprint,
|
||||
'prefs': print_live_refs,
|
||||
'hpy': hpy,
|
||||
}
|
||||
|
||||
|
42
scrapy/utils/ref.py
Normal file
42
scrapy/utils/ref.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""This module provides some functions and classes to record and report live
|
||||
references to object instances, for certain classes"""
|
||||
|
||||
import weakref
|
||||
from collections import defaultdict
|
||||
from time import time
|
||||
from operator import itemgetter
|
||||
|
||||
from scrapy.conf import settings
|
||||
|
||||
live_refs = defaultdict(weakref.WeakKeyDictionary)
|
||||
|
||||
class object_ref(object):
|
||||
"""Inherit from this class (instead of object) to a keep a record of live
|
||||
instances"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
obj = object.__new__(cls)
|
||||
live_refs[cls][obj] = time()
|
||||
return obj
|
||||
|
||||
if not settings.getbool('TRACK_REFS'):
|
||||
object_ref = object
|
||||
|
||||
def print_live_refs():
|
||||
print "Live References"
|
||||
print
|
||||
now = time()
|
||||
for cls, wdict in live_refs.iteritems():
|
||||
if not wdict:
|
||||
continue
|
||||
oldest = min(wdict.itervalues())
|
||||
print "%-30s %6d oldest: %ds ago" % (cls.__name__, len(wdict), \
|
||||
now-oldest)
|
||||
|
||||
def get_oldest(class_name):
|
||||
for cls, wdict in live_refs.iteritems():
|
||||
if cls.__name__ == class_name:
|
||||
if wdict:
|
||||
return min(wdict.iteritems(), key=itemgetter(1))[0]
|
@ -11,9 +11,10 @@ from scrapy.xpath.factories import xmlDoc_from_html, xmlDoc_from_xml
|
||||
from scrapy.xpath.document import Libxml2Document
|
||||
from scrapy.utils.python import flatten, unicode_to_str
|
||||
from scrapy.utils.misc import extract_regex
|
||||
from scrapy.utils.ref import object_ref
|
||||
from scrapy.utils.decorator import deprecated
|
||||
|
||||
class XPathSelector(object):
|
||||
class XPathSelector(object_ref):
|
||||
|
||||
def __init__(self, response=None, text=None, node=None, parent=None, expr=None):
|
||||
if parent:
|
||||
|
Loading…
x
Reference in New Issue
Block a user