2013-08-16 18:47:56 +02:00
|
|
|
import os
|
2016-06-15 14:48:25 +02:00
|
|
|
import random
|
2013-08-16 18:47:56 +02:00
|
|
|
import time
|
2013-12-18 22:25:42 +07:00
|
|
|
import hashlib
|
|
|
|
import warnings
|
2013-08-16 18:47:56 +02:00
|
|
|
from tempfile import mkdtemp
|
|
|
|
from shutil import rmtree
|
2016-02-15 17:50:47 +03:00
|
|
|
from six.moves.urllib.parse import urlparse
|
|
|
|
from six import BytesIO
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
from twisted.trial import unittest
|
|
|
|
from twisted.internet import defer
|
|
|
|
|
2016-02-15 17:50:47 +03:00
|
|
|
from scrapy.pipelines.files import FilesPipeline, FSFilesStore, S3FilesStore
|
2013-08-16 18:47:56 +02:00
|
|
|
from scrapy.item import Item, Field
|
|
|
|
from scrapy.http import Request, Response
|
2013-10-02 18:23:13 -02:00
|
|
|
from scrapy.settings import Settings
|
2015-07-29 17:38:13 +00:00
|
|
|
from scrapy.utils.python import to_bytes
|
2016-02-15 17:50:47 +03:00
|
|
|
from scrapy.utils.test import assert_aws_environ, get_s3_content_and_delete
|
2016-02-18 10:57:02 +03:00
|
|
|
from scrapy.utils.boto import is_botocore
|
2013-08-16 18:47:56 +02:00
|
|
|
|
2015-03-06 15:45:04 +02:00
|
|
|
from tests import mock
|
|
|
|
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
def _mocked_download_func(request, info):
|
|
|
|
response = request.meta.get('response')
|
|
|
|
return response() if callable(response) else response
|
|
|
|
|
|
|
|
|
|
|
|
class FilesPipelineTestCase(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.tempdir = mkdtemp()
|
2013-10-02 18:23:13 -02:00
|
|
|
self.pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir}))
|
|
|
|
self.pipeline.download_func = _mocked_download_func
|
2013-08-16 18:47:56 +02:00
|
|
|
self.pipeline.open_spider(None)
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
rmtree(self.tempdir)
|
|
|
|
|
|
|
|
def test_file_path(self):
|
2013-12-18 22:13:49 +07:00
|
|
|
file_path = self.pipeline.file_path
|
|
|
|
self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
|
2013-08-16 18:47:56 +02:00
|
|
|
'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
|
2013-12-18 22:13:49 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt")),
|
2013-08-16 18:47:56 +02:00
|
|
|
'full/4ce274dd83db0368bafd7e406f382ae088e39219.txt')
|
2013-12-18 22:13:49 +07:00
|
|
|
self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc")),
|
2013-08-16 18:47:56 +02:00
|
|
|
'full/94ccc495a17b9ac5d40e3eabf3afcb8c2c9b9e1a.doc')
|
2013-12-18 22:13:49 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
|
2013-08-16 18:47:56 +02:00
|
|
|
'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
|
2013-12-18 22:13:49 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
|
2013-08-16 18:47:56 +02:00
|
|
|
'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2')
|
2013-12-18 22:13:49 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
|
2013-08-16 18:47:56 +02:00
|
|
|
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
|
2013-12-18 23:09:37 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
|
|
|
|
response=Response("http://www.dorma.co.uk/images/product_details/2532"),
|
|
|
|
info=object()),
|
|
|
|
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
def test_fs_store(self):
|
|
|
|
assert isinstance(self.pipeline.store, FSFilesStore)
|
|
|
|
self.assertEqual(self.pipeline.store.basedir, self.tempdir)
|
|
|
|
|
2013-12-18 10:52:13 +07:00
|
|
|
path = 'some/image/key.jpg'
|
|
|
|
fullpath = os.path.join(self.tempdir, 'some', 'image', 'key.jpg')
|
|
|
|
self.assertEqual(self.pipeline.store._get_filesystem_path(path), fullpath)
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_file_not_expired(self):
|
|
|
|
item_url = "http://example.com/file.pdf"
|
|
|
|
item = _create_item_with_files(item_url)
|
|
|
|
patchers = [
|
|
|
|
mock.patch.object(FilesPipeline, 'inc_stats', return_value=True),
|
2013-08-22 15:13:59 +02:00
|
|
|
mock.patch.object(FSFilesStore, 'stat_file', return_value={
|
2013-08-16 18:47:56 +02:00
|
|
|
'checksum': 'abc', 'last_modified': time.time()}),
|
|
|
|
mock.patch.object(FilesPipeline, 'get_media_requests',
|
|
|
|
return_value=[_prepare_request_object(item_url)])
|
|
|
|
]
|
2013-10-22 13:18:26 +06:00
|
|
|
for p in patchers:
|
|
|
|
p.start()
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
result = yield self.pipeline.process_item(item, None)
|
|
|
|
self.assertEqual(result['files'][0]['checksum'], 'abc')
|
|
|
|
|
2013-10-22 13:18:26 +06:00
|
|
|
for p in patchers:
|
|
|
|
p.stop()
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_file_expired(self):
|
|
|
|
item_url = "http://example.com/file2.pdf"
|
|
|
|
item = _create_item_with_files(item_url)
|
|
|
|
patchers = [
|
2013-08-22 15:13:59 +02:00
|
|
|
mock.patch.object(FSFilesStore, 'stat_file', return_value={
|
2013-08-16 18:47:56 +02:00
|
|
|
'checksum': 'abc',
|
2016-03-31 19:20:39 -03:00
|
|
|
'last_modified': time.time() - (self.pipeline.expires * 60 * 60 * 24 * 2)}),
|
2013-08-16 18:47:56 +02:00
|
|
|
mock.patch.object(FilesPipeline, 'get_media_requests',
|
|
|
|
return_value=[_prepare_request_object(item_url)]),
|
|
|
|
mock.patch.object(FilesPipeline, 'inc_stats', return_value=True)
|
|
|
|
]
|
2013-10-22 13:18:26 +06:00
|
|
|
for p in patchers:
|
|
|
|
p.start()
|
2013-08-16 18:47:56 +02:00
|
|
|
|
|
|
|
result = yield self.pipeline.process_item(item, None)
|
|
|
|
self.assertNotEqual(result['files'][0]['checksum'], 'abc')
|
|
|
|
|
2013-10-22 13:18:26 +06:00
|
|
|
for p in patchers:
|
|
|
|
p.stop()
|
2013-08-16 18:47:56 +02:00
|
|
|
|
2013-12-17 23:57:22 +07:00
|
|
|
|
2013-12-18 22:25:42 +07:00
|
|
|
class DeprecatedFilesPipeline(FilesPipeline):
|
|
|
|
def file_key(self, url):
|
2015-07-29 17:38:13 +00:00
|
|
|
media_guid = hashlib.sha1(to_bytes(url)).hexdigest()
|
2013-12-18 22:25:42 +07:00
|
|
|
media_ext = os.path.splitext(url)[1]
|
|
|
|
return 'empty/%s%s' % (media_guid, media_ext)
|
|
|
|
|
|
|
|
|
|
|
|
class DeprecatedFilesPipelineTestCase(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
|
|
self.tempdir = mkdtemp()
|
|
|
|
|
|
|
|
def init_pipeline(self, pipeline_class):
|
|
|
|
self.pipeline = pipeline_class.from_settings(Settings({'FILES_STORE': self.tempdir}))
|
|
|
|
self.pipeline.download_func = _mocked_download_func
|
|
|
|
self.pipeline.open_spider(None)
|
|
|
|
|
|
|
|
def test_default_file_key_method(self):
|
|
|
|
self.init_pipeline(FilesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.file_key("https://dev.mydeco.com/mydeco.pdf"),
|
|
|
|
'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('file_key(url) method is deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def test_overridden_file_key_method(self):
|
|
|
|
self.init_pipeline(DeprecatedFilesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
|
|
|
|
'empty/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('file_key(url) method is deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
rmtree(self.tempdir)
|
|
|
|
|
|
|
|
|
2013-10-02 18:23:13 -02:00
|
|
|
class FilesPipelineTestCaseFields(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_item_fields_default(self):
|
|
|
|
class TestItem(Item):
|
|
|
|
name = Field()
|
|
|
|
file_urls = Field()
|
|
|
|
files = Field()
|
2015-03-18 07:26:56 +05:00
|
|
|
|
|
|
|
for cls in TestItem, dict:
|
|
|
|
url = 'http://www.example.com/files/1.txt'
|
|
|
|
item = cls({'name': 'item1', 'file_urls': [url]})
|
|
|
|
pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': 's3://example/files/'}))
|
|
|
|
requests = list(pipeline.get_media_requests(item, None))
|
|
|
|
self.assertEqual(requests[0].url, url)
|
|
|
|
results = [(True, {'url': url})]
|
|
|
|
pipeline.item_completed(results, item, None)
|
|
|
|
self.assertEqual(item['files'], [results[0][1]])
|
2013-12-17 23:57:22 +07:00
|
|
|
|
2013-10-02 18:23:13 -02:00
|
|
|
def test_item_fields_override_settings(self):
|
|
|
|
class TestItem(Item):
|
|
|
|
name = Field()
|
|
|
|
files = Field()
|
|
|
|
stored_file = Field()
|
2015-03-18 07:26:56 +05:00
|
|
|
|
|
|
|
for cls in TestItem, dict:
|
|
|
|
url = 'http://www.example.com/files/1.txt'
|
|
|
|
item = cls({'name': 'item1', 'files': [url]})
|
|
|
|
pipeline = FilesPipeline.from_settings(Settings({
|
|
|
|
'FILES_STORE': 's3://example/files/',
|
|
|
|
'FILES_URLS_FIELD': 'files',
|
|
|
|
'FILES_RESULT_FIELD': 'stored_file'
|
|
|
|
}))
|
|
|
|
requests = list(pipeline.get_media_requests(item, None))
|
|
|
|
self.assertEqual(requests[0].url, url)
|
|
|
|
results = [(True, {'url': url})]
|
|
|
|
pipeline.item_completed(results, item, None)
|
|
|
|
self.assertEqual(item['stored_file'], [results[0][1]])
|
2013-12-17 23:57:22 +07:00
|
|
|
|
|
|
|
|
2016-03-31 19:19:49 -03:00
|
|
|
class FilesPipelineTestCaseCustomSettings(unittest.TestCase):
|
2016-06-15 14:48:25 +02:00
|
|
|
default_cls_settings = {
|
|
|
|
"EXPIRES": 90,
|
2016-06-20 12:39:09 +02:00
|
|
|
"FILES_URLS_FIELD": "file_urls",
|
|
|
|
"FILES_RESULT_FIELD": "files"
|
2016-06-15 14:48:25 +02:00
|
|
|
}
|
|
|
|
file_cls_attr_settings_map = {
|
2016-06-15 15:39:11 +02:00
|
|
|
("EXPIRES", "FILES_EXPIRES", "expires"),
|
2016-06-20 12:39:09 +02:00
|
|
|
("FILES_URLS_FIELD", "FILES_URLS_FIELD", "files_urls_field"),
|
|
|
|
("FILES_RESULT_FIELD", "FILES_RESULT_FIELD", "files_result_field")
|
2016-06-15 14:48:25 +02:00
|
|
|
}
|
2016-03-31 19:19:49 -03:00
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.tempdir = mkdtemp()
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
rmtree(self.tempdir)
|
|
|
|
|
2016-06-15 14:48:25 +02:00
|
|
|
def _generate_fake_settings(self, prefix=None):
|
|
|
|
|
|
|
|
def random_string():
|
|
|
|
return "".join([chr(random.randint(97, 123)) for _ in range(10)])
|
|
|
|
|
|
|
|
settings = {
|
2016-12-21 17:03:11 +01:00
|
|
|
"FILES_EXPIRES": random.randint(100, 1000),
|
2016-06-15 14:48:25 +02:00
|
|
|
"FILES_URLS_FIELD": random_string(),
|
|
|
|
"FILES_RESULT_FIELD": random_string(),
|
|
|
|
"FILES_STORE": self.tempdir
|
|
|
|
}
|
|
|
|
if not prefix:
|
|
|
|
return settings
|
|
|
|
|
2016-06-15 15:39:11 +02:00
|
|
|
return {prefix.upper() + "_" + k if k != "FILES_STORE" else k: v for k, v in settings.items()}
|
2016-06-15 14:48:25 +02:00
|
|
|
|
2016-06-15 15:12:18 +02:00
|
|
|
def _generate_fake_pipeline(self):
|
|
|
|
|
|
|
|
class UserDefinedFilePipeline(FilesPipeline):
|
2016-06-20 12:39:09 +02:00
|
|
|
EXPIRES = 1001
|
|
|
|
FILES_URLS_FIELD = "alfa"
|
|
|
|
FILES_RESULT_FIELD = "beta"
|
2016-06-15 15:12:18 +02:00
|
|
|
|
|
|
|
return UserDefinedFilePipeline
|
|
|
|
|
2016-06-15 14:48:25 +02:00
|
|
|
def test_different_settings_for_different_instances(self):
|
2016-06-15 15:12:18 +02:00
|
|
|
"""
|
|
|
|
If there are different instances with different settings they should keep
|
|
|
|
different settings.
|
|
|
|
"""
|
2016-06-15 14:48:25 +02:00
|
|
|
custom_settings = self._generate_fake_settings()
|
|
|
|
another_pipeline = FilesPipeline.from_settings(Settings(custom_settings))
|
|
|
|
one_pipeline = FilesPipeline(self.tempdir)
|
2016-06-15 15:39:11 +02:00
|
|
|
for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
|
2016-06-15 14:48:25 +02:00
|
|
|
default_value = self.default_cls_settings[pipe_attr]
|
|
|
|
self.assertEqual(getattr(one_pipeline, pipe_attr), default_value)
|
|
|
|
custom_value = custom_settings[settings_attr]
|
2016-06-20 12:39:09 +02:00
|
|
|
self.assertNotEqual(default_value, custom_value)
|
2016-06-15 15:39:11 +02:00
|
|
|
self.assertEqual(getattr(another_pipeline, pipe_ins_attr), custom_value)
|
2016-03-31 19:19:49 -03:00
|
|
|
|
2016-06-15 15:12:18 +02:00
|
|
|
def test_subclass_attributes_preserved_if_no_settings(self):
|
|
|
|
"""
|
|
|
|
If subclasses override class attributes and there are no special settings those values should be kept.
|
|
|
|
"""
|
|
|
|
pipe_cls = self._generate_fake_pipeline()
|
|
|
|
pipe = pipe_cls.from_settings(Settings({"FILES_STORE": self.tempdir}))
|
2016-06-15 15:39:11 +02:00
|
|
|
for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
|
2016-06-20 12:39:09 +02:00
|
|
|
custom_value = getattr(pipe, pipe_ins_attr)
|
|
|
|
self.assertNotEqual(custom_value, self.default_cls_settings[pipe_attr])
|
2016-06-15 15:39:11 +02:00
|
|
|
self.assertEqual(getattr(pipe, pipe_ins_attr), getattr(pipe, pipe_attr))
|
|
|
|
|
|
|
|
def test_subclass_attrs_preserved_custom_settings(self):
|
|
|
|
"""
|
2016-09-15 09:30:09 +02:00
|
|
|
If file settings are defined but they are not defined for subclass
|
|
|
|
settings should be preserved.
|
2016-06-15 15:39:11 +02:00
|
|
|
"""
|
|
|
|
pipeline_cls = self._generate_fake_pipeline()
|
|
|
|
settings = self._generate_fake_settings()
|
|
|
|
pipeline = pipeline_cls.from_settings(Settings(settings))
|
|
|
|
for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
|
|
|
|
value = getattr(pipeline, pipe_ins_attr)
|
2016-09-15 09:30:09 +02:00
|
|
|
setting_value = settings.get(settings_attr)
|
2016-06-20 12:39:09 +02:00
|
|
|
self.assertNotEqual(value, self.default_cls_settings[pipe_attr])
|
2016-09-15 09:30:09 +02:00
|
|
|
self.assertEqual(value, setting_value)
|
2016-06-15 15:39:11 +02:00
|
|
|
|
|
|
|
def test_no_custom_settings_for_subclasses(self):
|
|
|
|
"""
|
|
|
|
If there are no settings for subclass and no subclass attributes, pipeline should use
|
|
|
|
attributes of base class.
|
|
|
|
"""
|
|
|
|
class UserDefinedFilesPipeline(FilesPipeline):
|
|
|
|
pass
|
|
|
|
|
|
|
|
user_pipeline = UserDefinedFilesPipeline.from_settings(Settings({"FILES_STORE": self.tempdir}))
|
|
|
|
for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
|
|
|
|
# Values from settings for custom pipeline should be set on pipeline instance.
|
|
|
|
custom_value = self.default_cls_settings.get(pipe_attr.upper())
|
|
|
|
self.assertEqual(getattr(user_pipeline, pipe_ins_attr), custom_value)
|
|
|
|
|
|
|
|
def test_custom_settings_for_subclasses(self):
|
|
|
|
"""
|
|
|
|
If there are custom settings for subclass and NO class attributes, pipeline should use custom
|
|
|
|
settings.
|
|
|
|
"""
|
|
|
|
class UserDefinedFilesPipeline(FilesPipeline):
|
|
|
|
pass
|
|
|
|
|
|
|
|
prefix = UserDefinedFilesPipeline.__name__.upper()
|
|
|
|
settings = self._generate_fake_settings(prefix=prefix)
|
|
|
|
user_pipeline = UserDefinedFilesPipeline.from_settings(Settings(settings))
|
|
|
|
for pipe_attr, settings_attr, pipe_inst_attr in self.file_cls_attr_settings_map:
|
|
|
|
# Values from settings for custom pipeline should be set on pipeline instance.
|
|
|
|
custom_value = settings.get(prefix + "_" + settings_attr)
|
2016-06-20 12:39:09 +02:00
|
|
|
self.assertNotEqual(custom_value, self.default_cls_settings[pipe_attr])
|
2016-06-15 15:39:11 +02:00
|
|
|
self.assertEqual(getattr(user_pipeline, pipe_inst_attr), custom_value)
|
|
|
|
|
|
|
|
def test_custom_settings_and_class_attrs_for_subclasses(self):
|
|
|
|
"""
|
|
|
|
If there are custom settings for subclass AND class attributes
|
|
|
|
setting keys are preferred and override attributes.
|
|
|
|
"""
|
|
|
|
pipeline_cls = self._generate_fake_pipeline()
|
|
|
|
prefix = pipeline_cls.__name__.upper()
|
|
|
|
settings = self._generate_fake_settings(prefix=prefix)
|
|
|
|
user_pipeline = pipeline_cls.from_settings(Settings(settings))
|
|
|
|
for pipe_cls_attr, settings_attr, pipe_inst_attr in self.file_cls_attr_settings_map:
|
|
|
|
custom_value = settings.get(prefix + "_" + settings_attr)
|
2016-06-20 12:39:09 +02:00
|
|
|
self.assertNotEqual(custom_value, self.default_cls_settings[pipe_cls_attr])
|
2016-06-15 15:39:11 +02:00
|
|
|
self.assertEqual(getattr(user_pipeline, pipe_inst_attr), custom_value)
|
2016-06-15 15:12:18 +02:00
|
|
|
|
2016-06-20 12:39:09 +02:00
|
|
|
def test_cls_attrs_with_DEFAULT_prefix(self):
|
|
|
|
class UserDefinedFilesPipeline(FilesPipeline):
|
|
|
|
DEFAULT_FILES_RESULT_FIELD = "this"
|
|
|
|
DEFAULT_FILES_URLS_FIELD = "that"
|
|
|
|
|
|
|
|
pipeline = UserDefinedFilesPipeline.from_settings(Settings({"FILES_STORE": self.tempdir}))
|
|
|
|
self.assertEqual(pipeline.files_result_field, "this")
|
|
|
|
self.assertEqual(pipeline.files_urls_field, "that")
|
|
|
|
|
2016-03-31 19:19:49 -03:00
|
|
|
|
2016-09-15 09:30:09 +02:00
|
|
|
def test_user_defined_subclass_default_key_names(self):
|
|
|
|
"""Test situation when user defines subclass of FilesPipeline,
|
|
|
|
but uses attribute names for default pipeline (without prefixing
|
|
|
|
them with pipeline class name).
|
|
|
|
"""
|
|
|
|
settings = self._generate_fake_settings()
|
|
|
|
|
|
|
|
class UserPipe(FilesPipeline):
|
|
|
|
pass
|
|
|
|
|
|
|
|
pipeline_cls = UserPipe.from_settings(Settings(settings))
|
|
|
|
|
|
|
|
for pipe_attr, settings_attr, pipe_inst_attr in self.file_cls_attr_settings_map:
|
|
|
|
expected_value = settings.get(settings_attr)
|
|
|
|
self.assertEqual(getattr(pipeline_cls, pipe_inst_attr),
|
|
|
|
expected_value)
|
|
|
|
|
|
|
|
|
2016-02-15 17:50:47 +03:00
|
|
|
class TestS3FilesStore(unittest.TestCase):
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_persist(self):
|
|
|
|
assert_aws_environ()
|
|
|
|
uri = os.environ.get('S3_TEST_FILE_URI')
|
|
|
|
if not uri:
|
|
|
|
raise unittest.SkipTest("No S3 URI available for testing")
|
|
|
|
data = b"TestS3FilesStore: \xe2\x98\x83"
|
|
|
|
buf = BytesIO(data)
|
|
|
|
meta = {'foo': 'bar'}
|
|
|
|
path = ''
|
|
|
|
store = S3FilesStore(uri)
|
2016-02-18 10:57:02 +03:00
|
|
|
yield store.persist_file(
|
|
|
|
path, buf, info=None, meta=meta,
|
|
|
|
headers={'Content-Type': 'image/png'})
|
2016-02-15 17:50:47 +03:00
|
|
|
s = yield store.stat_file(path, info=None)
|
|
|
|
self.assertIn('last_modified', s)
|
|
|
|
self.assertIn('checksum', s)
|
2016-02-15 19:14:41 +03:00
|
|
|
self.assertEqual(s['checksum'], '3187896a9657a28163abb31667df64c8')
|
2016-02-15 17:50:47 +03:00
|
|
|
u = urlparse(uri)
|
2016-02-18 10:57:02 +03:00
|
|
|
content, key = get_s3_content_and_delete(
|
|
|
|
u.hostname, u.path[1:], with_key=True)
|
2016-02-15 17:50:47 +03:00
|
|
|
self.assertEqual(content, data)
|
2016-02-18 10:57:02 +03:00
|
|
|
if is_botocore():
|
|
|
|
self.assertEqual(key['Metadata'], {'foo': 'bar'})
|
|
|
|
self.assertEqual(
|
|
|
|
key['CacheControl'], S3FilesStore.HEADERS['Cache-Control'])
|
|
|
|
self.assertEqual(key['ContentType'], 'image/png')
|
|
|
|
else:
|
|
|
|
self.assertEqual(key.metadata, {'foo': 'bar'})
|
|
|
|
self.assertEqual(
|
|
|
|
key.cache_control, S3FilesStore.HEADERS['Cache-Control'])
|
|
|
|
self.assertEqual(key.content_type, 'image/png')
|
2016-02-15 17:50:47 +03:00
|
|
|
|
|
|
|
|
2013-08-16 18:47:56 +02:00
|
|
|
class ItemWithFiles(Item):
|
|
|
|
file_urls = Field()
|
|
|
|
files = Field()
|
|
|
|
|
|
|
|
|
|
|
|
def _create_item_with_files(*files):
|
|
|
|
item = ItemWithFiles()
|
|
|
|
item['file_urls'] = files
|
|
|
|
return item
|
|
|
|
|
|
|
|
|
|
|
|
def _prepare_request_object(item_url):
|
|
|
|
return Request(
|
|
|
|
item_url,
|
2015-07-25 17:56:46 +00:00
|
|
|
meta={'response': Response(item_url, status=200, body=b'data')})
|
2013-08-16 18:47:56 +02:00
|
|
|
|
2013-12-18 22:25:42 +07:00
|
|
|
|
2013-08-16 18:47:56 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
unittest.main()
|