1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 14:43:46 +00:00

added trackref stats to memory debugger report. closes #272

This commit is contained in:
Pablo Hoffman 2010-10-27 21:18:58 -02:00
parent 1d5c56089c
commit 7f646541c3
2 changed files with 21 additions and 12 deletions

View File

@ -4,6 +4,7 @@ MemoryDebugger extension
See documentation in docs/topics/extensions.rst See documentation in docs/topics/extensions.rst
""" """
import os
import gc import gc
import socket import socket
@ -12,6 +13,7 @@ from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals from scrapy import signals
from scrapy.exceptions import NotConfigured from scrapy.exceptions import NotConfigured
from scrapy.mail import MailSender from scrapy.mail import MailSender
from scrapy.utils.trackref import format_live_refs
from scrapy.conf import settings from scrapy.conf import settings
from scrapy import log from scrapy import log
@ -22,7 +24,7 @@ class MemoryDebugger(object):
import libxml2 import libxml2
self.libxml2 = libxml2 self.libxml2 = libxml2
except ImportError: except ImportError:
raise NotConfigured self.libxml2 = None
if not settings.getbool('MEMDEBUG_ENABLED'): if not settings.getbool('MEMDEBUG_ENABLED'):
raise NotConfigured raise NotConfigured
@ -33,7 +35,8 @@ class MemoryDebugger(object):
dispatcher.connect(self.engine_stopped, signals.engine_stopped) dispatcher.connect(self.engine_stopped, signals.engine_stopped)
def engine_started(self): def engine_started(self):
self.libxml2.debugMemory(1) if self.libxml2:
self.libxml2.debugMemory(1)
def engine_stopped(self): def engine_stopped(self):
figures = self.collect_figures() figures = self.collect_figures()
@ -41,12 +44,13 @@ class MemoryDebugger(object):
self.log_or_send_report(report) self.log_or_send_report(report)
def collect_figures(self): def collect_figures(self):
self.libxml2.cleanupParser()
gc.collect() gc.collect()
figures = [] figures = []
figures.append(("Objects in gc.garbage", len(gc.garbage), "")) figures.append(("Objects in gc.garbage", len(gc.garbage), ""))
figures.append(("libxml2 memory leak", self.libxml2.debugMemory(1), "bytes")) if self.libxml2:
self.libxml2.cleanupParser()
figures.append(("libxml2 memory leak", self.libxml2.debugMemory(1), "bytes"))
return figures return figures
def create_report(self, figures): def create_report(self, figures):
@ -54,6 +58,9 @@ class MemoryDebugger(object):
s += "SCRAPY MEMORY DEBUGGER RESULTS\n\n" s += "SCRAPY MEMORY DEBUGGER RESULTS\n\n"
for f in figures: for f in figures:
s += "%-30s : %d %s\n" % f s += "%-30s : %d %s\n" % f
if settings.getbool('TRACK_REFS'):
s += os.linesep
s += format_live_refs()
return s return s
def log_or_send_report(self, report): def log_or_send_report(self, report):

View File

@ -10,7 +10,7 @@ and no performance penalty at all when disabled (as object_ref becomes just an
alias to object in that case). alias to object in that case).
""" """
import weakref import weakref, os
from collections import defaultdict from collections import defaultdict
from time import time from time import time
from operator import itemgetter from operator import itemgetter
@ -34,12 +34,10 @@ class object_ref(object):
if not settings.getbool('TRACK_REFS'): if not settings.getbool('TRACK_REFS'):
object_ref = object object_ref = object
def print_live_refs(ignore=NoneType): def format_live_refs(ignore=NoneType):
if object_ref is object: if object_ref is object:
print "The trackref module is disabled. Use TRACK_REFS setting to enable it." return "The trackref module is disabled. Use TRACK_REFS setting to enable it."
return s = "Live References" + os.linesep + os.linesep
print "Live References"
print
now = time() now = time()
for cls, wdict in live_refs.iteritems(): for cls, wdict in live_refs.iteritems():
if not wdict: if not wdict:
@ -47,8 +45,12 @@ def print_live_refs(ignore=NoneType):
if issubclass(cls, ignore): if issubclass(cls, ignore):
continue continue
oldest = min(wdict.itervalues()) oldest = min(wdict.itervalues())
print "%-30s %6d oldest: %ds ago" % (cls.__name__, len(wdict), \ s += "%-30s %6d oldest: %ds ago" % (cls.__name__, len(wdict), \
now-oldest) now-oldest) + os.linesep
return s
def print_live_refs(*a, **kw):
print format_live_refs(*a, **kw)
def get_oldest(class_name): def get_oldest(class_name):
for cls, wdict in live_refs.iteritems(): for cls, wdict in live_refs.iteritems():