1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 17:43:57 +00:00

added pickled_meta to avoid using eval

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4084
This commit is contained in:
samus_ 2008-07-22 11:51:53 +00:00
parent 76a9d2da10
commit 97b2e7df55

View File

@ -4,6 +4,7 @@ import os
import hashlib import hashlib
import datetime import datetime
import urlparse import urlparse
import cPickle
from pydispatch import dispatcher from pydispatch import dispatcher
from twisted.internet import defer from twisted.internet import defer
@ -110,14 +111,14 @@ class Cache(object):
os.makedirs(linkpath) os.makedirs(linkpath)
def is_cached(self, domain, key): def is_cached(self, domain, key):
requestpath = self.requestpath(domain, key) pickled_meta = os.path.join(self.requestpath(domain, key), 'pickled_meta')
if os.path.exists(requestpath): if os.path.exists(requestpath):
with open(os.path.join(requestpath, 'meta_data')) as f: with open(pickled_meta) as f:
metadata = eval(f.read()) metadata = cPickle.load(f)
if datetime.datetime.now() <= metadata['timestamp'] + datetime.timedelta(seconds=settings.getint('CACHE2_EXPIRATION_SECS')): if datetime.datetime.utcnow() <= metadata['timestamp'] + datetime.timedelta(seconds=settings.getint('CACHE2_EXPIRATION_SECS')):
return True return True
else: else:
log.msg('dropping old cached response from %s' % metadata['timestamp'], log.INFO) log.msg('dropping old cached response from %s' % metadata['timestamp'])
return False return False
else: else:
return False return False
@ -132,14 +133,13 @@ class Cache(object):
return None # not cached return None # not cached
metadata = responsebody = responseheaders = None metadata = responsebody = responseheaders = None
with open(os.path.join(requestpath, 'meta_data')) as f: with open(os.path.join(requestpath, 'pickled_meta')) as f:
metadata = f.read() metadata = cPickle.load(f)
with open(os.path.join(requestpath, 'response_body')) as f: with open(os.path.join(requestpath, 'response_body')) as f:
responsebody = f.read() responsebody = f.read()
with open(os.path.join(requestpath, 'response_headers')) as f: with open(os.path.join(requestpath, 'response_headers')) as f:
responseheaders = f.read() responseheaders = f.read()
metadata = eval(metadata)
url = metadata['url'] url = metadata['url']
original_url = metadata.get('original_url', url) original_url = metadata.get('original_url', url)
headers = Headers(responseheaders) headers = Headers(responseheaders)
@ -162,12 +162,15 @@ class Cache(object):
'status': response.status, 'status': response.status,
'domain': response.domain, 'domain': response.domain,
'original_url': response.original_url, 'original_url': response.original_url,
'timestamp': datetime.datetime.now(), 'timestamp': datetime.datetime.utcnow(),
} }
# metadata # metadata
with open(os.path.join(requestpath, 'meta_data'), 'w') as f: with open(os.path.join(requestpath, 'meta_data'), 'w') as f:
f.write(repr(metadata)) f.write(repr(metadata))
# pickled metadata (to recover without using eval)
with open(os.path.join(requestpath, 'pickled_meta'), 'wb') as f:
cPickle.dump(metadata, f, -1)
# response # response
with open(os.path.join(requestpath, 'response_headers'), 'w') as f: with open(os.path.join(requestpath, 'response_headers'), 'w') as f:
f.write(headers_dict_to_raw(response.headers)) f.write(headers_dict_to_raw(response.headers))