mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-22 00:53:24 +00:00
implemented CACHE2_EXPIRATION_SECS and migrated sha to hashlib
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4080
This commit is contained in:
parent
b45d87d0fe
commit
d2121141a3
@ -11,6 +11,7 @@ USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
|
||||
DOWNLOAD_TIMEOUT = 180 # 3mins
|
||||
CONCURRENT_DOMAINS = 8 # number of domains to scrape in parallel
|
||||
REQUESTS_PER_DOMAIN = 8 # max simultaneous requests per domain
|
||||
CACHE2_EXPIRATION_SECS = 48 * 60 * 60 # seconds while cached response is still valid
|
||||
|
||||
LOG_ENABLED = True #
|
||||
LOGLEVEL = 'DEBUG' # default loglevel
|
||||
|
@ -111,7 +111,12 @@ class Cache(object):
|
||||
|
||||
def is_cached(self, domain, key):
|
||||
requestpath = self.requestpath(domain, key)
|
||||
return os.path.exists(requestpath)
|
||||
if os.path.exists(requestpath):
|
||||
with open(os.path.join(requestpath, 'meta_data')) as f:
|
||||
metadata = eval(f.read())
|
||||
return datetime.datetime.now() <= metadata['timestamp'] + datetime.timedelta(seconds=settings.getint('CACHE2_EXPIRATION_SECS'))
|
||||
else:
|
||||
return False
|
||||
|
||||
def retrieve_response(self, domain, key):
|
||||
"""
|
||||
|
@ -1,6 +1,6 @@
|
||||
import urllib
|
||||
import warnings
|
||||
from sha import sha
|
||||
from hashlib import sha1
|
||||
from copy import copy
|
||||
from base64 import urlsafe_b64encode
|
||||
|
||||
@ -149,7 +149,8 @@ class Request(object):
|
||||
headers = dict([(k, v) for k, v in self.headers.items() if k.lower() not in keys])
|
||||
|
||||
# fingerprint generation
|
||||
fp = sha(canonicalize(self.url))
|
||||
fp = sha1()
|
||||
fp.update(canonicalize(self.url))
|
||||
fp.update(self.method)
|
||||
|
||||
if self.body and self.method in ['POST', 'PUT']:
|
||||
|
Loading…
x
Reference in New Issue
Block a user