2009-10-07 22:34:38 -02:00
|
|
|
import os
|
2013-12-18 22:44:04 +07:00
|
|
|
import hashlib
|
|
|
|
import warnings
|
2014-07-15 21:26:01 +08:00
|
|
|
from tempfile import mkdtemp, TemporaryFile
|
2010-09-06 11:04:27 -03:00
|
|
|
from shutil import rmtree
|
|
|
|
|
2009-04-11 05:05:58 +00:00
|
|
|
from twisted.trial import unittest
|
2010-09-06 11:04:27 -03:00
|
|
|
|
2013-10-02 18:23:13 -02:00
|
|
|
from scrapy.item import Item, Field
|
2013-12-18 23:09:37 +07:00
|
|
|
from scrapy.http import Request, Response
|
2013-10-02 18:23:13 -02:00
|
|
|
from scrapy.settings import Settings
|
2015-04-20 23:43:38 -03:00
|
|
|
from scrapy.pipelines.images import ImagesPipeline
|
2015-07-29 17:48:27 +00:00
|
|
|
from scrapy.utils.python import to_bytes
|
2009-09-02 01:31:08 -03:00
|
|
|
|
2013-06-20 10:49:14 -03:00
|
|
|
skip = False
|
2010-09-16 14:19:32 -03:00
|
|
|
try:
|
2012-04-20 19:04:44 -03:00
|
|
|
from PIL import Image
|
2013-10-20 01:59:35 +06:00
|
|
|
except ImportError as e:
|
2013-06-20 10:49:14 -03:00
|
|
|
skip = 'Missing Python Imaging Library, install https://pypi.python.org/pypi/Pillow'
|
2012-12-01 16:39:58 +01:00
|
|
|
else:
|
|
|
|
encoders = set(('jpeg_encoder', 'jpeg_decoder'))
|
2013-06-20 10:49:14 -03:00
|
|
|
if not encoders.issubset(set(Image.core.__dict__)):
|
|
|
|
skip = 'Missing JPEG encoders'
|
2010-09-16 14:19:32 -03:00
|
|
|
|
2013-08-16 17:02:31 +02:00
|
|
|
|
2010-10-23 05:10:52 -02:00
|
|
|
def _mocked_download_func(request, info):
|
|
|
|
response = request.meta.get('response')
|
|
|
|
return response() if callable(response) else response
|
|
|
|
|
2008-10-16 14:42:51 +00:00
|
|
|
|
|
|
|
class ImagesPipelineTestCase(unittest.TestCase):
|
2010-09-16 14:19:32 -03:00
|
|
|
|
|
|
|
skip = skip
|
|
|
|
|
2009-03-27 05:42:47 +00:00
|
|
|
def setUp(self):
|
2009-09-02 01:31:08 -03:00
|
|
|
self.tempdir = mkdtemp()
|
2010-10-23 05:10:52 -02:00
|
|
|
self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func)
|
2009-03-27 05:42:47 +00:00
|
|
|
|
|
|
|
def tearDown(self):
|
2010-09-06 11:04:27 -03:00
|
|
|
rmtree(self.tempdir)
|
2009-03-27 05:42:47 +00:00
|
|
|
|
2013-12-18 22:46:27 +07:00
|
|
|
def test_file_path(self):
|
|
|
|
file_path = self.pipeline.file_path
|
|
|
|
self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.gif")),
|
2009-03-27 05:42:47 +00:00
|
|
|
'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
|
2013-12-18 22:46:27 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
|
2009-03-27 05:42:47 +00:00
|
|
|
'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
|
2013-12-18 22:46:27 +07:00
|
|
|
self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
|
2009-03-27 05:42:47 +00:00
|
|
|
'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
|
2013-12-18 22:46:27 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
|
2009-03-27 05:42:47 +00:00
|
|
|
'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
|
2013-12-18 22:46:27 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
|
2009-03-27 05:42:47 +00:00
|
|
|
'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
|
2013-12-18 22:46:27 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
|
2009-03-27 05:42:47 +00:00
|
|
|
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
|
2013-12-18 23:09:37 +07:00
|
|
|
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
|
|
|
|
response=Response("http://www.dorma.co.uk/images/product_details/2532"),
|
|
|
|
info=object()),
|
|
|
|
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
|
2008-10-16 14:42:51 +00:00
|
|
|
|
|
|
|
def test_thumbnail_name(self):
|
2013-12-17 23:57:22 +07:00
|
|
|
thumb_path = self.pipeline.thumb_path
|
2008-10-16 14:42:51 +00:00
|
|
|
name = '50'
|
2013-12-17 23:57:22 +07:00
|
|
|
self.assertEqual(thumb_path(Request("file:///tmp/foo.jpg"), name),
|
|
|
|
'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
|
|
|
|
self.assertEqual(thumb_path(Request("file://foo.png"), name),
|
|
|
|
'thumbs/50/e55b765eba0ec7348e50a1df496040449071b96a.jpg')
|
|
|
|
self.assertEqual(thumb_path(Request("file:///tmp/foo"), name),
|
|
|
|
'thumbs/50/0329ad83ebb8e93ea7c7906d46e9ed55f7349a50.jpg')
|
|
|
|
self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name),
|
|
|
|
'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')
|
2013-12-18 23:09:37 +07:00
|
|
|
self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name,
|
|
|
|
response=Response("file:///tmp/some.name/foo"),
|
|
|
|
info=object()),
|
|
|
|
'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')
|
2008-10-16 14:42:51 +00:00
|
|
|
|
2010-09-16 14:19:32 -03:00
|
|
|
def test_convert_image(self):
|
|
|
|
SIZE = (100, 100)
|
|
|
|
# straigh forward case: RGB and JPEG
|
|
|
|
COLOUR = (0, 127, 255)
|
|
|
|
im = _create_image('JPEG', 'RGB', SIZE, COLOUR)
|
|
|
|
converted, _ = self.pipeline.convert_image(im)
|
|
|
|
self.assertEquals(converted.mode, 'RGB')
|
|
|
|
self.assertEquals(converted.getcolors(), [(10000, COLOUR)])
|
|
|
|
|
|
|
|
# check that thumbnail keep image ratio
|
|
|
|
thumbnail, _ = self.pipeline.convert_image(converted, size=(10, 25))
|
|
|
|
self.assertEquals(thumbnail.mode, 'RGB')
|
|
|
|
self.assertEquals(thumbnail.size, (10, 10))
|
|
|
|
|
|
|
|
# transparency case: RGBA and PNG
|
|
|
|
COLOUR = (0, 127, 255, 50)
|
|
|
|
im = _create_image('PNG', 'RGBA', SIZE, COLOUR)
|
|
|
|
converted, _ = self.pipeline.convert_image(im)
|
|
|
|
self.assertEquals(converted.mode, 'RGB')
|
|
|
|
self.assertEquals(converted.getcolors(), [(10000, (205, 230, 255))])
|
|
|
|
|
2013-12-17 23:57:22 +07:00
|
|
|
|
2013-12-18 22:44:04 +07:00
|
|
|
class DeprecatedImagesPipeline(ImagesPipeline):
|
|
|
|
def file_key(self, url):
|
|
|
|
return self.image_key(url)
|
|
|
|
|
|
|
|
def image_key(self, url):
|
2015-07-29 17:48:27 +00:00
|
|
|
image_guid = hashlib.sha1(to_bytes(url)).hexdigest()
|
2013-12-18 22:44:04 +07:00
|
|
|
return 'empty/%s.jpg' % (image_guid)
|
|
|
|
|
|
|
|
def thumb_key(self, url, thumb_id):
|
2015-07-29 17:48:27 +00:00
|
|
|
thumb_guid = hashlib.sha1(to_bytes(url)).hexdigest()
|
2013-12-18 22:44:04 +07:00
|
|
|
return 'thumbsup/%s/%s.jpg' % (thumb_id, thumb_guid)
|
|
|
|
|
|
|
|
|
|
|
|
class DeprecatedImagesPipelineTestCase(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
|
|
self.tempdir = mkdtemp()
|
|
|
|
|
|
|
|
def init_pipeline(self, pipeline_class):
|
|
|
|
self.pipeline = pipeline_class(self.tempdir, download_func=_mocked_download_func)
|
|
|
|
self.pipeline.open_spider(None)
|
|
|
|
|
|
|
|
def test_default_file_key_method(self):
|
|
|
|
self.init_pipeline(ImagesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.file_key("https://dev.mydeco.com/mydeco.gif"),
|
|
|
|
'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def test_default_image_key_method(self):
|
|
|
|
self.init_pipeline(ImagesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.image_key("https://dev.mydeco.com/mydeco.gif"),
|
|
|
|
'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def test_overridden_file_key_method(self):
|
|
|
|
self.init_pipeline(DeprecatedImagesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.file_path(Request("https://dev.mydeco.com/mydeco.gif")),
|
|
|
|
'empty/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def test_default_thumb_key_method(self):
|
|
|
|
self.init_pipeline(ImagesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.thumb_key("file:///tmp/foo.jpg", 50),
|
|
|
|
'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('thumb_key(url) method is deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def test_overridden_thumb_key_method(self):
|
|
|
|
self.init_pipeline(DeprecatedImagesPipeline)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
warnings.simplefilter('always')
|
|
|
|
self.assertEqual(self.pipeline.thumb_path(Request("file:///tmp/foo.jpg"), 50),
|
|
|
|
'thumbsup/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
self.assertTrue('thumb_key(url) method is deprecated' in str(w[-1].message))
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
rmtree(self.tempdir)
|
|
|
|
|
|
|
|
|
2013-10-02 18:23:13 -02:00
|
|
|
class ImagesPipelineTestCaseFields(unittest.TestCase):
|
2010-09-16 14:19:32 -03:00
|
|
|
|
2013-10-02 18:23:13 -02:00
|
|
|
def test_item_fields_default(self):
|
|
|
|
class TestItem(Item):
|
|
|
|
name = Field()
|
|
|
|
image_urls = Field()
|
|
|
|
images = Field()
|
2015-03-18 07:26:56 +05:00
|
|
|
|
|
|
|
for cls in TestItem, dict:
|
|
|
|
url = 'http://www.example.com/images/1.jpg'
|
|
|
|
item = cls({'name': 'item1', 'image_urls': [url]})
|
|
|
|
pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'}))
|
|
|
|
requests = list(pipeline.get_media_requests(item, None))
|
|
|
|
self.assertEqual(requests[0].url, url)
|
|
|
|
results = [(True, {'url': url})]
|
|
|
|
pipeline.item_completed(results, item, None)
|
|
|
|
self.assertEqual(item['images'], [results[0][1]])
|
2013-12-17 23:57:22 +07:00
|
|
|
|
2013-10-02 18:23:13 -02:00
|
|
|
def test_item_fields_override_settings(self):
|
|
|
|
class TestItem(Item):
|
|
|
|
name = Field()
|
|
|
|
image = Field()
|
|
|
|
stored_image = Field()
|
2015-03-18 07:26:56 +05:00
|
|
|
|
|
|
|
for cls in TestItem, dict:
|
|
|
|
url = 'http://www.example.com/images/1.jpg'
|
|
|
|
item = cls({'name': 'item1', 'image': [url]})
|
|
|
|
pipeline = ImagesPipeline.from_settings(Settings({
|
|
|
|
'IMAGES_STORE': 's3://example/images/',
|
|
|
|
'IMAGES_URLS_FIELD': 'image',
|
|
|
|
'IMAGES_RESULT_FIELD': 'stored_image'
|
|
|
|
}))
|
|
|
|
requests = list(pipeline.get_media_requests(item, None))
|
|
|
|
self.assertEqual(requests[0].url, url)
|
|
|
|
results = [(True, {'url': url})]
|
|
|
|
pipeline.item_completed(results, item, None)
|
|
|
|
self.assertEqual(item['stored_image'], [results[0][1]])
|
2013-12-17 23:57:22 +07:00
|
|
|
|
|
|
|
|
2016-03-31 19:19:49 -03:00
|
|
|
class ImagesPipelineTestCaseCustomSettings(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.tempdir = mkdtemp()
|
|
|
|
self.pipeline = ImagesPipeline(self.tempdir)
|
|
|
|
self.default_settings = Settings()
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
rmtree(self.tempdir)
|
|
|
|
|
|
|
|
def test_expires(self):
|
|
|
|
another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
|
|
|
|
'IMAGES_EXPIRES': 42}))
|
|
|
|
self.assertEqual(self.pipeline.expires, self.default_settings.getint('IMAGES_EXPIRES'))
|
|
|
|
self.assertEqual(another_pipeline.expires, 42)
|
|
|
|
|
|
|
|
def test_images_urls_field(self):
|
|
|
|
another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
|
|
|
|
'IMAGES_URLS_FIELD': 'funny_field'}))
|
|
|
|
self.assertEqual(self.pipeline.images_urls_field, self.default_settings.get('IMAGES_URLS_FIELD'))
|
|
|
|
self.assertEqual(another_pipeline.images_urls_field, 'funny_field')
|
|
|
|
|
|
|
|
def test_images_result_field(self):
|
|
|
|
another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
|
|
|
|
'IMAGES_RESULT_FIELD': 'funny_field'}))
|
|
|
|
self.assertEqual(self.pipeline.images_result_field, self.default_settings.get('IMAGES_RESULT_FIELD'))
|
|
|
|
self.assertEqual(another_pipeline.images_result_field, 'funny_field')
|
|
|
|
|
|
|
|
def test_min_width(self):
|
|
|
|
another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
|
|
|
|
'IMAGES_MIN_WIDTH': 42}))
|
|
|
|
self.assertEqual(self.pipeline.min_width, self.default_settings.getint('IMAGES_MIN_WIDTH'))
|
|
|
|
self.assertEqual(another_pipeline.min_width, 42)
|
|
|
|
|
|
|
|
def test_min_height(self):
|
|
|
|
another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
|
|
|
|
'IMAGES_MIN_HEIGHT': 42}))
|
|
|
|
self.assertEqual(self.pipeline.min_height, self.default_settings.getint('IMAGES_MIN_HEIGHT'))
|
|
|
|
self.assertEqual(another_pipeline.min_height, 42)
|
|
|
|
|
|
|
|
def test_thumbs(self):
|
|
|
|
custom_thumbs = {'small': (50, 50), 'big': (270, 270)}
|
|
|
|
another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
|
|
|
|
'IMAGES_THUMBS': custom_thumbs}))
|
|
|
|
self.assertEqual(self.pipeline.thumbs, self.default_settings.get('IMAGES_THUMBS'))
|
|
|
|
self.assertEqual(another_pipeline.thumbs, custom_thumbs)
|
|
|
|
|
|
|
|
|
2010-09-16 14:19:32 -03:00
|
|
|
def _create_image(format, *a, **kw):
|
2014-07-15 21:26:01 +08:00
|
|
|
buf = TemporaryFile()
|
2010-09-16 14:19:32 -03:00
|
|
|
Image.new(*a, **kw).save(buf, format)
|
|
|
|
buf.seek(0)
|
|
|
|
return Image.open(buf)
|
2009-10-07 22:34:38 -02:00
|
|
|
|
|
|
|
|
2008-10-16 14:42:51 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
unittest.main()
|