mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 15:24:15 +00:00
Merge pull request #201 from alexcepoi/test-fixes-mac
improve mac os compatibility
This commit is contained in:
commit
9c9a18b3a3
@ -35,7 +35,7 @@ vsftpd_log_file=/dev/null
|
||||
vsftpd_pid=$!
|
||||
fi
|
||||
|
||||
find -name '*.py[co]' -delete
|
||||
find . -name '*.py[co]' -delete
|
||||
if [ $# -eq 0 ]; then
|
||||
$trial --reporter=text scrapy scrapyd
|
||||
else
|
||||
|
@ -1,11 +1,16 @@
|
||||
import unittest, tempfile, shutil, time
|
||||
import time
|
||||
import tempfile
|
||||
import shutil
|
||||
import unittest
|
||||
from contextlib import contextmanager
|
||||
|
||||
from scrapy.http import Response, HtmlResponse, Request
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.contrib.downloadermiddleware.httpcache import FilesystemCacheStorage, HttpCacheMiddleware
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.exceptions import IgnoreRequest
|
||||
from scrapy.utils.test import get_crawler
|
||||
from scrapy.contrib.downloadermiddleware.httpcache import \
|
||||
FilesystemCacheStorage, HttpCacheMiddleware
|
||||
|
||||
|
||||
class HttpCacheMiddlewareTest(unittest.TestCase):
|
||||
@ -16,8 +21,10 @@ class HttpCacheMiddlewareTest(unittest.TestCase):
|
||||
self.crawler = get_crawler()
|
||||
self.spider = BaseSpider('example.com')
|
||||
self.tmpdir = tempfile.mkdtemp()
|
||||
self.request = Request('http://www.example.com', headers={'User-Agent': 'test'})
|
||||
self.response = Response('http://www.example.com', headers={'Content-Type': 'text/html'}, body='test body', status=202)
|
||||
self.request = Request('http://www.example.com',
|
||||
headers={'User-Agent': 'test'})
|
||||
self.response = Response('http://www.example.com', headers=
|
||||
{'Content-Type': 'text/html'}, body='test body', status=202)
|
||||
self.crawler.stats.open_spider(self.spider)
|
||||
|
||||
def tearDown(self):
|
||||
@ -34,113 +41,135 @@ class HttpCacheMiddlewareTest(unittest.TestCase):
|
||||
settings.update(new_settings)
|
||||
return Settings(settings)
|
||||
|
||||
def _get_storage(self, **new_settings):
|
||||
return self.storage_class(self._get_settings(**new_settings))
|
||||
@contextmanager
|
||||
def _storage(self, **new_settings):
|
||||
settings = self._get_settings(**new_settings)
|
||||
storage = self.storage_class(settings)
|
||||
storage.open_spider(self.spider)
|
||||
try:
|
||||
yield storage
|
||||
finally:
|
||||
storage.close_spider(self.spider)
|
||||
|
||||
def _get_middleware(self, **new_settings):
|
||||
mw = HttpCacheMiddleware(self._get_settings(**new_settings), self.crawler.stats)
|
||||
@contextmanager
|
||||
def _middleware(self, **new_settings):
|
||||
settings = self._get_settings(**new_settings)
|
||||
mw = HttpCacheMiddleware(settings, self.crawler.stats)
|
||||
mw.spider_opened(self.spider)
|
||||
return mw
|
||||
try:
|
||||
yield mw
|
||||
finally:
|
||||
mw.spider_closed(self.spider)
|
||||
|
||||
def test_storage(self):
|
||||
storage = self._get_storage()
|
||||
request2 = self.request.copy()
|
||||
assert storage.retrieve_response(self.spider, request2) is None
|
||||
storage.store_response(self.spider, self.request, self.response)
|
||||
response2 = storage.retrieve_response(self.spider, request2)
|
||||
assert isinstance(response2, HtmlResponse) # inferred from content-type header
|
||||
self.assertEqualResponse(self.response, response2)
|
||||
time.sleep(2) # wait for cache to expire
|
||||
assert storage.retrieve_response(self.spider, request2) is None
|
||||
with self._storage() as storage:
|
||||
request2 = self.request.copy()
|
||||
assert storage.retrieve_response(self.spider, request2) is None
|
||||
|
||||
storage.store_response(self.spider, self.request, self.response)
|
||||
response2 = storage.retrieve_response(self.spider, request2)
|
||||
assert isinstance(response2, HtmlResponse) # content-type header
|
||||
self.assertEqualResponse(self.response, response2)
|
||||
|
||||
time.sleep(2) # wait for cache to expire
|
||||
assert storage.retrieve_response(self.spider, request2) is None
|
||||
|
||||
def test_storage_never_expire(self):
|
||||
storage = self._get_storage(HTTPCACHE_EXPIRATION_SECS=0)
|
||||
assert storage.retrieve_response(self.spider, self.request) is None
|
||||
storage.store_response(self.spider, self.request, self.response)
|
||||
time.sleep(0.5) # give the chance to expire
|
||||
assert storage.retrieve_response(self.spider, self.request)
|
||||
with self._storage(HTTPCACHE_EXPIRATION_SECS=0) as storage:
|
||||
assert storage.retrieve_response(self.spider, self.request) is None
|
||||
storage.store_response(self.spider, self.request, self.response)
|
||||
time.sleep(0.5) # give the chance to expire
|
||||
assert storage.retrieve_response(self.spider, self.request)
|
||||
|
||||
def test_middleware(self):
|
||||
mw = self._get_middleware()
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
with self._middleware() as mw:
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
|
||||
def test_different_request_response_urls(self):
|
||||
mw = self._get_middleware()
|
||||
req = Request('http://host.com/path')
|
||||
res = Response('http://host2.net/test.html')
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
cached = mw.process_request(req, self.spider)
|
||||
assert isinstance(cached, Response)
|
||||
self.assertEqualResponse(res, cached)
|
||||
assert 'cached' in cached.flags
|
||||
with self._middleware() as mw:
|
||||
req = Request('http://host.com/path')
|
||||
res = Response('http://host2.net/test.html')
|
||||
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
|
||||
cached = mw.process_request(req, self.spider)
|
||||
assert isinstance(cached, Response)
|
||||
self.assertEqualResponse(res, cached)
|
||||
assert 'cached' in cached.flags
|
||||
|
||||
def test_middleware_ignore_missing(self):
|
||||
mw = self._get_middleware(HTTPCACHE_IGNORE_MISSING=True)
|
||||
self.assertRaises(IgnoreRequest, mw.process_request, self.request, self.spider)
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
with self._middleware(HTTPCACHE_IGNORE_MISSING=True) as mw:
|
||||
self.assertRaises(IgnoreRequest, mw.process_request, self.request, self.spider)
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
|
||||
def test_middleware_ignore_schemes(self):
|
||||
# http responses are cached by default
|
||||
req, res = Request('http://test.com/'), Response('http://test.com/')
|
||||
mw = self._get_middleware()
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
cached = mw.process_request(req, self.spider)
|
||||
assert isinstance(cached, Response), type(cached)
|
||||
self.assertEqualResponse(res, cached)
|
||||
assert 'cached' in cached.flags
|
||||
with self._middleware() as mw:
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
|
||||
cached = mw.process_request(req, self.spider)
|
||||
assert isinstance(cached, Response), type(cached)
|
||||
self.assertEqualResponse(res, cached)
|
||||
assert 'cached' in cached.flags
|
||||
|
||||
# file response is not cached by default
|
||||
req, res = Request('file:///tmp/t.txt'), Response('file:///tmp/t.txt')
|
||||
mw = self._get_middleware()
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
assert mw.storage.retrieve_response(self.spider, req) is None
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
with self._middleware() as mw:
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
|
||||
assert mw.storage.retrieve_response(self.spider, req) is None
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
|
||||
# s3 scheme response is cached by default
|
||||
req, res = Request('s3://bucket/key'), Response('http://bucket/key')
|
||||
mw = self._get_middleware()
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
cached = mw.process_request(req, self.spider)
|
||||
assert isinstance(cached, Response), type(cached)
|
||||
self.assertEqualResponse(res, cached)
|
||||
assert 'cached' in cached.flags
|
||||
with self._middleware() as mw:
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
|
||||
cached = mw.process_request(req, self.spider)
|
||||
assert isinstance(cached, Response), type(cached)
|
||||
self.assertEqualResponse(res, cached)
|
||||
assert 'cached' in cached.flags
|
||||
|
||||
# ignore s3 scheme
|
||||
req, res = Request('s3://bucket/key2'), Response('http://bucket/key2')
|
||||
mw = self._get_middleware(HTTPCACHE_IGNORE_SCHEMES=['s3'])
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
assert mw.storage.retrieve_response(self.spider, req) is None
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
with self._middleware(HTTPCACHE_IGNORE_SCHEMES=['s3']) as mw:
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
mw.process_response(req, res, self.spider)
|
||||
|
||||
assert mw.storage.retrieve_response(self.spider, req) is None
|
||||
assert mw.process_request(req, self.spider) is None
|
||||
|
||||
def test_middleware_ignore_http_codes(self):
|
||||
# test response is not cached
|
||||
mw = self._get_middleware(HTTPCACHE_IGNORE_HTTP_CODES=[202])
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
assert mw.storage.retrieve_response(self.spider, self.request) is None
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
with self._middleware(HTTPCACHE_IGNORE_HTTP_CODES=[202]) as mw:
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
|
||||
assert mw.storage.retrieve_response(self.spider, self.request) is None
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
|
||||
# test response is cached
|
||||
mw = self._get_middleware(HTTPCACHE_IGNORE_HTTP_CODES=[203])
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
with self._middleware(HTTPCACHE_IGNORE_HTTP_CODES=[203]) as mw:
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
|
||||
def assertEqualResponse(self, response1, response2):
|
||||
self.assertEqual(response1.url, response2.url)
|
||||
@ -150,4 +179,3 @@ class HttpCacheMiddlewareTest(unittest.TestCase):
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
|
@ -8,9 +8,11 @@ from twisted.trial import unittest
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
skip = False
|
||||
except ImportError, e:
|
||||
skip = True
|
||||
else:
|
||||
encoders = set(('jpeg_encoder', 'jpeg_decoder'))
|
||||
skip = not encoders.issubset(set(Image.core.__dict__))
|
||||
|
||||
def _mocked_download_func(request, info):
|
||||
response = request.meta.get('response')
|
||||
|
@ -45,7 +45,7 @@ class FifoDiskQueue(object):
|
||||
self.chunksize = self.info['chunksize']
|
||||
self.headf = self._openchunk(self.info['head'][0], 'ab+')
|
||||
self.tailf = self._openchunk(self.info['tail'][0])
|
||||
self.tailf.seek(self.info['tail'][2])
|
||||
os.lseek(self.tailf.fileno(), self.info['tail'][2], os.SEEK_SET)
|
||||
|
||||
def push(self, string):
|
||||
hnum, hpos = self.info['head']
|
||||
|
Loading…
x
Reference in New Issue
Block a user