mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 10:43:43 +00:00
some cleanup to memusage and memdebug extensions
This commit is contained in:
parent
f1980a3d9f
commit
45ed662ee5
@ -4,7 +4,6 @@ MemoryDebugger extension
|
||||
See documentation in docs/ref/extensions.rst
|
||||
"""
|
||||
|
||||
import pprint
|
||||
import gc
|
||||
import socket
|
||||
|
||||
@ -14,8 +13,9 @@ from scrapy.xlib.pydispatch import dispatcher
|
||||
from scrapy.core import signals
|
||||
from scrapy.core.exceptions import NotConfigured
|
||||
from scrapy.mail import MailSender
|
||||
from scrapy.extension import extensions
|
||||
from scrapy.stats import stats
|
||||
from scrapy.conf import settings
|
||||
from scrapy.utils.memory import get_vmvalue_from_procfs
|
||||
from scrapy import log
|
||||
|
||||
class MemoryDebugger(object):
|
||||
@ -43,12 +43,13 @@ class MemoryDebugger(object):
|
||||
gc.collect()
|
||||
|
||||
figures = []
|
||||
if 'MemoryUsage' in extensions.enabled:
|
||||
memusage = extensions.enabled['MemoryUsage']
|
||||
memusage.update()
|
||||
figures.append(("Memory usage at startup", int(memusage.data['startup']/1024/1024), "Mb"))
|
||||
figures.append(("Maximum memory usage", int(memusage.data['max']/1024/1024), "Mb"))
|
||||
figures.append(("Memory usage at shutdown", int(memusage.virtual/1024/1024), "Mb"))
|
||||
if stats.get_value('memusage/startup'):
|
||||
figures.append(("Memory usage at startup", \
|
||||
stats.get_value('memusage/startup')/1024/1024, "Mb"))
|
||||
figures.append(("Maximum memory usage", \
|
||||
stats.get_value('memusage/max')/1024/1024, "Mb"))
|
||||
figures.append(("Memory usage at shutdown", \
|
||||
get_vmvalue_from_procfs()/1024/1024, "Mb"))
|
||||
figures.append(("Objects in gc.garbage", len(gc.garbage), ""))
|
||||
figures.append(("libxml2 memory leak", libxml2.debugMemory(1), "bytes"))
|
||||
return figures
|
||||
@ -57,10 +58,11 @@ class MemoryDebugger(object):
|
||||
s = ""
|
||||
s += "SCRAPY MEMORY DEBUGGER RESULTS\n\n"
|
||||
for f in figures:
|
||||
s += "%-30s : %s %s\n" % f
|
||||
s += "%-30s : %d %s\n" % f
|
||||
return s
|
||||
|
||||
def log_or_send_report(self, report):
|
||||
if self.rcpts:
|
||||
self.mail.send(self.rcpts, "Scrapy Memory Debugger results at %s" % socket.gethostname(), report)
|
||||
self.mail.send(self.rcpts, "Scrapy Memory Debugger results at %s" % \
|
||||
socket.gethostname(), report)
|
||||
log.msg(report)
|
||||
|
@ -4,7 +4,6 @@ MemoryUsage extension
|
||||
See documentation in docs/ref/extensions.rst
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import socket
|
||||
|
||||
@ -18,25 +17,18 @@ from scrapy.core.exceptions import NotConfigured
|
||||
from scrapy.mail import MailSender
|
||||
from scrapy.conf import settings
|
||||
from scrapy.stats import stats
|
||||
from scrapy.utils.memory import get_vmvalue_from_procfs
|
||||
|
||||
class MemoryUsage(object):
|
||||
|
||||
_proc_status = '/proc/%d/status' % os.getpid()
|
||||
_scale = {'kB': 1024.0, 'mB': 1024.0*1024.0,
|
||||
'KB': 1024.0, 'MB': 1024.0*1024.0}
|
||||
|
||||
def __init__(self):
|
||||
if not settings.getbool('MEMUSAGE_ENABLED'):
|
||||
raise NotConfigured
|
||||
if sys.platform != 'linux2':
|
||||
raise NotConfigured("MemoryUsage extension is only available on Linux")
|
||||
if not os.path.exists('/proc'):
|
||||
raise NotConfigured
|
||||
|
||||
self.warned = False
|
||||
|
||||
self.data = {}
|
||||
self.data['startup'] = 0
|
||||
self.data['max'] = 0
|
||||
|
||||
scrapyengine.addtask(self.update, 60.0, now=True)
|
||||
|
||||
self.notify_mails = settings.getlist('MEMUSAGE_NOTIFY')
|
||||
@ -56,40 +48,13 @@ class MemoryUsage(object):
|
||||
|
||||
@property
|
||||
def virtual(self):
|
||||
return self._vmvalue('VmSize:')
|
||||
|
||||
@property
|
||||
def resident(self):
|
||||
return self._vmvalue('VmRSS:')
|
||||
|
||||
@property
|
||||
def stacksize(self):
|
||||
return self._vmvalue('VmStk:')
|
||||
return get_vmvalue_from_procfs('VmSize')
|
||||
|
||||
def engine_started(self):
|
||||
self.data['startup'] = self.virtual
|
||||
stats.set_value('memusage/startup', int(self.virtual))
|
||||
stats.set_value('memusage/startup', self.virtual)
|
||||
|
||||
def update(self):
|
||||
if self.virtual > self.data['max']:
|
||||
self.data['max'] = self.virtual
|
||||
stats.set_value('memusage/max', int(self.virtual))
|
||||
|
||||
def _vmvalue(self, VmKey):
|
||||
# get pseudo file /proc/<pid>/status
|
||||
try:
|
||||
t = open(self._proc_status)
|
||||
v = t.read()
|
||||
t.close()
|
||||
except:
|
||||
return 0.0 # non-Linux?
|
||||
# get VmKey line e.g. 'VmRSS: 9999 kB\n ...'
|
||||
i = v.index(VmKey)
|
||||
v = v[i:].split(None, 3) # whitespace
|
||||
if len(v) < 3:
|
||||
return 0.0 # invalid format?
|
||||
# convert Vm value to bytes
|
||||
return float(v[1]) * self._scale[v[2]]
|
||||
stats.max_value('memusage/max', self.virtual)
|
||||
|
||||
def _check_limit(self):
|
||||
if self.virtual > self.limit:
|
||||
@ -119,8 +84,8 @@ class MemoryUsage(object):
|
||||
|
||||
def _send_report(self, rcpts, subject):
|
||||
"""send notification mail with some additional useful info"""
|
||||
s = "Memory usage at engine startup : %dM\r\n" % (self.data['startup']/1024/1024)
|
||||
s += "Maximum memory usage : %dM\r\n" % (self.data['max']/1024/1024)
|
||||
s = "Memory usage at engine startup : %dM\r\n" % (stats.get_value('memusage/startup')/1024/1024)
|
||||
s += "Maximum memory usage : %dM\r\n" % (stats.get_value('memusage/max')/1024/1024)
|
||||
s += "Current memory usage : %dM\r\n" % (self.virtual/1024/1024)
|
||||
|
||||
s += "ENGINE STATUS ------------------------------------------------------- \r\n"
|
||||
|
19
scrapy/tests/test_utils_memory.py
Normal file
19
scrapy/tests/test_utils_memory.py
Normal file
@ -0,0 +1,19 @@
|
||||
import os
|
||||
|
||||
from twisted.trial import unittest
|
||||
|
||||
from scrapy.utils.memory import get_vmvalue_from_procfs
|
||||
|
||||
class UtilsMemoryTestCase(unittest.TestCase):
|
||||
|
||||
def test_get_vmvalue_from_procfs(self):
|
||||
if not os.path.exists('/proc'):
|
||||
raise unittest.SkipTest('/proc filesystem not supported')
|
||||
vmsize = get_vmvalue_from_procfs('VmSize')
|
||||
vmrss = get_vmvalue_from_procfs('VmRSS')
|
||||
assert vmsize > 0
|
||||
assert vmrss > 0
|
||||
assert vmsize > vmrss
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
25
scrapy/utils/memory.py
Normal file
25
scrapy/utils/memory.py
Normal file
@ -0,0 +1,25 @@
|
||||
import os
|
||||
|
||||
_vmvalue_scale = {'kB': 1024, 'mB': 1024*1024, 'KB': 1024, 'MB': 1024*1024}
|
||||
|
||||
def get_vmvalue_from_procfs(vmkey='VmSize', pid=None):
|
||||
"""Return virtual memory value (in bytes) for the given pid using the /proc
|
||||
filesystem. If pid is not given, it default to the current process pid.
|
||||
Available keys are: VmSize, VmRSS (default), VmStk
|
||||
"""
|
||||
if pid is None:
|
||||
pid = os.getpid()
|
||||
try:
|
||||
t = open('/proc/%d/status' % pid)
|
||||
except IOError:
|
||||
raise RuntimeError("/proc filesystem not supported")
|
||||
v = t.read()
|
||||
t.close()
|
||||
# get vmkey line e.g. 'VmRSS: 9999 kB\n ...'
|
||||
i = v.index(vmkey + ':')
|
||||
v = v[i:].split(None, 3) # whitespace
|
||||
if len(v) < 3:
|
||||
return 0 # invalid format?
|
||||
# convert Vm value to bytes
|
||||
return float(v[1]) * _vmvalue_scale[v[2]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user