1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 00:53:24 +00:00

implemented CACHE2_EXPIRATION_SECS and migrated sha to hashlib

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4080
This commit is contained in:
samus_ 2008-07-21 13:23:23 +00:00
parent b45d87d0fe
commit d2121141a3
3 changed files with 10 additions and 3 deletions

View File

@ -11,6 +11,7 @@ USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
DOWNLOAD_TIMEOUT = 180 # 3mins
CONCURRENT_DOMAINS = 8 # number of domains to scrape in parallel
REQUESTS_PER_DOMAIN = 8 # max simultaneous requests per domain
CACHE2_EXPIRATION_SECS = 48 * 60 * 60 # seconds while cached response is still valid
LOG_ENABLED = True #
LOGLEVEL = 'DEBUG' # default loglevel

View File

@ -111,7 +111,12 @@ class Cache(object):
def is_cached(self, domain, key):
requestpath = self.requestpath(domain, key)
return os.path.exists(requestpath)
if os.path.exists(requestpath):
with open(os.path.join(requestpath, 'meta_data')) as f:
metadata = eval(f.read())
return datetime.datetime.now() <= metadata['timestamp'] + datetime.timedelta(seconds=settings.getint('CACHE2_EXPIRATION_SECS'))
else:
return False
def retrieve_response(self, domain, key):
"""

View File

@ -1,6 +1,6 @@
import urllib
import warnings
from sha import sha
from hashlib import sha1
from copy import copy
from base64 import urlsafe_b64encode
@ -149,7 +149,8 @@ class Request(object):
headers = dict([(k, v) for k, v in self.headers.items() if k.lower() not in keys])
# fingerprint generation
fp = sha(canonicalize(self.url))
fp = sha1()
fp.update(canonicalize(self.url))
fp.update(self.method)
if self.body and self.method in ['POST', 'PUT']: