1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 10:43:43 +00:00

some cleanup to memusage and memdebug extensions

This commit is contained in:
Pablo Hoffman 2009-08-17 09:41:02 -03:00
parent f1980a3d9f
commit 45ed662ee5
4 changed files with 64 additions and 53 deletions

View File

@ -4,7 +4,6 @@ MemoryDebugger extension
See documentation in docs/ref/extensions.rst
"""
import pprint
import gc
import socket
@ -14,8 +13,9 @@ from scrapy.xlib.pydispatch import dispatcher
from scrapy.core import signals
from scrapy.core.exceptions import NotConfigured
from scrapy.mail import MailSender
from scrapy.extension import extensions
from scrapy.stats import stats
from scrapy.conf import settings
from scrapy.utils.memory import get_vmvalue_from_procfs
from scrapy import log
class MemoryDebugger(object):
@ -43,12 +43,13 @@ class MemoryDebugger(object):
gc.collect()
figures = []
if 'MemoryUsage' in extensions.enabled:
memusage = extensions.enabled['MemoryUsage']
memusage.update()
figures.append(("Memory usage at startup", int(memusage.data['startup']/1024/1024), "Mb"))
figures.append(("Maximum memory usage", int(memusage.data['max']/1024/1024), "Mb"))
figures.append(("Memory usage at shutdown", int(memusage.virtual/1024/1024), "Mb"))
if stats.get_value('memusage/startup'):
figures.append(("Memory usage at startup", \
stats.get_value('memusage/startup')/1024/1024, "Mb"))
figures.append(("Maximum memory usage", \
stats.get_value('memusage/max')/1024/1024, "Mb"))
figures.append(("Memory usage at shutdown", \
get_vmvalue_from_procfs()/1024/1024, "Mb"))
figures.append(("Objects in gc.garbage", len(gc.garbage), ""))
figures.append(("libxml2 memory leak", libxml2.debugMemory(1), "bytes"))
return figures
@ -57,10 +58,11 @@ class MemoryDebugger(object):
s = ""
s += "SCRAPY MEMORY DEBUGGER RESULTS\n\n"
for f in figures:
s += "%-30s : %s %s\n" % f
s += "%-30s : %d %s\n" % f
return s
def log_or_send_report(self, report):
if self.rcpts:
self.mail.send(self.rcpts, "Scrapy Memory Debugger results at %s" % socket.gethostname(), report)
self.mail.send(self.rcpts, "Scrapy Memory Debugger results at %s" % \
socket.gethostname(), report)
log.msg(report)

View File

@ -4,7 +4,6 @@ MemoryUsage extension
See documentation in docs/ref/extensions.rst
"""
import sys
import os
import socket
@ -18,25 +17,18 @@ from scrapy.core.exceptions import NotConfigured
from scrapy.mail import MailSender
from scrapy.conf import settings
from scrapy.stats import stats
from scrapy.utils.memory import get_vmvalue_from_procfs
class MemoryUsage(object):
_proc_status = '/proc/%d/status' % os.getpid()
_scale = {'kB': 1024.0, 'mB': 1024.0*1024.0,
'KB': 1024.0, 'MB': 1024.0*1024.0}
def __init__(self):
if not settings.getbool('MEMUSAGE_ENABLED'):
raise NotConfigured
if sys.platform != 'linux2':
raise NotConfigured("MemoryUsage extension is only available on Linux")
if not os.path.exists('/proc'):
raise NotConfigured
self.warned = False
self.data = {}
self.data['startup'] = 0
self.data['max'] = 0
scrapyengine.addtask(self.update, 60.0, now=True)
self.notify_mails = settings.getlist('MEMUSAGE_NOTIFY')
@ -56,40 +48,13 @@ class MemoryUsage(object):
@property
def virtual(self):
return self._vmvalue('VmSize:')
@property
def resident(self):
return self._vmvalue('VmRSS:')
@property
def stacksize(self):
return self._vmvalue('VmStk:')
return get_vmvalue_from_procfs('VmSize')
def engine_started(self):
self.data['startup'] = self.virtual
stats.set_value('memusage/startup', int(self.virtual))
stats.set_value('memusage/startup', self.virtual)
def update(self):
if self.virtual > self.data['max']:
self.data['max'] = self.virtual
stats.set_value('memusage/max', int(self.virtual))
def _vmvalue(self, VmKey):
# get pseudo file /proc/<pid>/status
try:
t = open(self._proc_status)
v = t.read()
t.close()
except:
return 0.0 # non-Linux?
# get VmKey line e.g. 'VmRSS: 9999 kB\n ...'
i = v.index(VmKey)
v = v[i:].split(None, 3) # whitespace
if len(v) < 3:
return 0.0 # invalid format?
# convert Vm value to bytes
return float(v[1]) * self._scale[v[2]]
stats.max_value('memusage/max', self.virtual)
def _check_limit(self):
if self.virtual > self.limit:
@ -119,8 +84,8 @@ class MemoryUsage(object):
def _send_report(self, rcpts, subject):
"""send notification mail with some additional useful info"""
s = "Memory usage at engine startup : %dM\r\n" % (self.data['startup']/1024/1024)
s += "Maximum memory usage : %dM\r\n" % (self.data['max']/1024/1024)
s = "Memory usage at engine startup : %dM\r\n" % (stats.get_value('memusage/startup')/1024/1024)
s += "Maximum memory usage : %dM\r\n" % (stats.get_value('memusage/max')/1024/1024)
s += "Current memory usage : %dM\r\n" % (self.virtual/1024/1024)
s += "ENGINE STATUS ------------------------------------------------------- \r\n"

View File

@ -0,0 +1,19 @@
import os
from twisted.trial import unittest
from scrapy.utils.memory import get_vmvalue_from_procfs
class UtilsMemoryTestCase(unittest.TestCase):
def test_get_vmvalue_from_procfs(self):
if not os.path.exists('/proc'):
raise unittest.SkipTest('/proc filesystem not supported')
vmsize = get_vmvalue_from_procfs('VmSize')
vmrss = get_vmvalue_from_procfs('VmRSS')
assert vmsize > 0
assert vmrss > 0
assert vmsize > vmrss
if __name__ == "__main__":
unittest.main()

25
scrapy/utils/memory.py Normal file
View File

@ -0,0 +1,25 @@
import os
_vmvalue_scale = {'kB': 1024, 'mB': 1024*1024, 'KB': 1024, 'MB': 1024*1024}
def get_vmvalue_from_procfs(vmkey='VmSize', pid=None):
"""Return virtual memory value (in bytes) for the given pid using the /proc
filesystem. If pid is not given, it default to the current process pid.
Available keys are: VmSize, VmRSS (default), VmStk
"""
if pid is None:
pid = os.getpid()
try:
t = open('/proc/%d/status' % pid)
except IOError:
raise RuntimeError("/proc filesystem not supported")
v = t.read()
t.close()
# get vmkey line e.g. 'VmRSS: 9999 kB\n ...'
i = v.index(vmkey + ':')
v = v[i:].split(None, 3) # whitespace
if len(v) < 3:
return 0 # invalid format?
# convert Vm value to bytes
return float(v[1]) * _vmvalue_scale[v[2]]