1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 18:23:57 +00:00

cache: read metadata only when when looking for cached items. refs #61

thanks Patrick Mezard for patch.

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40849
This commit is contained in:
Daniel Grana 2009-02-12 08:00:07 +00:00
parent d68646422d
commit 2b14251510

View File

@ -1,5 +1,6 @@
from __future__ import with_statement
import errno
import os
import hashlib
import datetime
@ -111,36 +112,37 @@ class Cache(object):
if not os.path.exists(linkpath):
os.makedirs(linkpath)
def is_cached(self, domain, key):
def read_meta(self, domain, key):
"""Return the metadata dictionary (possibly empty) if the entry is
cached, None otherwise.
"""
requestpath = self.requestpath(domain, key)
if os.path.exists(requestpath):
try:
with open(os.path.join(requestpath, 'pickled_meta'), 'r') as f:
metadata = pickle.load(f)
except IOError, e:
if e.errno != errno.ENOENT:
raise
return None
expiration_secs = settings.getint('CACHE2_EXPIRATION_SECS')
if expiration_secs >= 0:
if datetime.datetime.utcnow() <= metadata['timestamp'] + datetime.timedelta(seconds=expiration_secs):
return True
else:
expiration_date = metadata['timestamp'] + datetime.timedelta(seconds=expiration_secs)
if datetime.datetime.utcnow() > expiration_date:
log.msg('dropping old cached response from %s' % metadata['timestamp'], level=log.DEBUG)
return False
else:
# disabled cache expiration
return True
else:
return False
return None
return metadata
def retrieve_response(self, domain, key):
"""
Return response dictionary if request has correspondent cache record;
return None if not.
"""
if not self.is_cached(domain, key):
metadata = self.read_meta(domain, key)
if metadata is None:
return None # not cached
requestpath = self.requestpath(domain, key)
metadata = responsebody = responseheaders = None
with open(os.path.join(requestpath, 'pickled_meta'), 'r') as f:
metadata = pickle.load(f)
responsebody = responseheaders = None
with open(os.path.join(requestpath, 'response_body')) as f:
responsebody = f.read()
with open(os.path.join(requestpath, 'response_headers')) as f: