1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 17:03:40 +00:00

Merge pull request #490 from max-arnold/master

add new pipeline methods to get file/image/thumbnail paths
This commit is contained in:
Daniel Graña 2013-12-23 15:11:53 -08:00
commit 2c2ce20878
4 changed files with 286 additions and 74 deletions

View File

@ -33,14 +33,14 @@ class FSFilesStore(object):
self._mkdir(self.basedir)
self.created_directories = defaultdict(set)
def persist_file(self, key, buf, info, meta=None, headers=None):
absolute_path = self._get_filesystem_path(key)
def persist_file(self, path, buf, info, meta=None, headers=None):
absolute_path = self._get_filesystem_path(path)
self._mkdir(os.path.dirname(absolute_path), info)
with open(absolute_path, 'wb') as f:
f.write(buf.getvalue())
def stat_file(self, key, info):
absolute_path = self._get_filesystem_path(key)
def stat_file(self, path, info):
absolute_path = self._get_filesystem_path(path)
try:
last_modified = os.path.getmtime(absolute_path)
except: # FIXME: catching everything!
@ -51,8 +51,8 @@ class FSFilesStore(object):
return {'last_modified': last_modified, 'checksum': checksum}
def _get_filesystem_path(self, key):
path_comps = key.split('/')
def _get_filesystem_path(self, path):
path_comps = path.split('/')
return os.path.join(self.basedir, *path_comps)
def _mkdir(self, dirname, domain=None):
@ -77,7 +77,7 @@ class S3FilesStore(object):
assert uri.startswith('s3://')
self.bucket, self.prefix = uri[5:].split('/', 1)
def stat_file(self, key, info):
def stat_file(self, path, info):
def _onsuccess(boto_key):
checksum = boto_key.etag.strip('"')
last_modified = boto_key.last_modified
@ -85,7 +85,7 @@ class S3FilesStore(object):
modified_stamp = int(rfc822.mktime_tz(modified_tuple))
return {'checksum': checksum, 'last_modified': modified_stamp}
return self._get_boto_key(key).addCallback(_onsuccess)
return self._get_boto_key(path).addCallback(_onsuccess)
def _get_boto_bucket(self):
from boto.s3.connection import S3Connection
@ -94,15 +94,15 @@ class S3FilesStore(object):
c = S3Connection(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, is_secure=False)
return c.get_bucket(self.bucket, validate=False)
def _get_boto_key(self, key):
def _get_boto_key(self, path):
b = self._get_boto_bucket()
key_name = '%s%s' % (self.prefix, key)
key_name = '%s%s' % (self.prefix, path)
return threads.deferToThread(b.get_key, key_name)
def persist_file(self, key, buf, info, meta=None, headers=None):
def persist_file(self, path, buf, info, meta=None, headers=None):
"""Upload file to S3 storage"""
b = self._get_boto_bucket()
key_name = '%s%s' % (self.prefix, key)
key_name = '%s%s' % (self.prefix, path)
k = b.new_key(key_name)
if meta:
for metakey, metavalue in meta.iteritems():
@ -191,10 +191,10 @@ class FilesPipeline(MediaPipeline):
self.inc_stats(info.spider, 'uptodate')
checksum = result.get('checksum', None)
return {'url': request.url, 'path': key, 'checksum': checksum}
return {'url': request.url, 'path': path, 'checksum': checksum}
key = self.file_key(request.url)
dfd = defer.maybeDeferred(self.store.stat_file, key, info)
path = self.file_path(request, info=info)
dfd = defer.maybeDeferred(self.store.stat_file, path, info)
dfd.addCallbacks(_onsuccess, lambda _: None)
dfd.addErrback(log.err, self.__class__.__name__ + '.store.stat_file')
return dfd
@ -232,7 +232,7 @@ class FilesPipeline(MediaPipeline):
self.inc_stats(info.spider, status)
try:
key = self.file_key(request.url)
path = self.file_path(request, response=response, info=info)
checksum = self.file_downloaded(response, request, info)
except FileException as exc:
whyfmt = 'File (error): Error processing image from %(request)s referred in <%(referer)s>: %(errormsg)s'
@ -244,7 +244,7 @@ class FilesPipeline(MediaPipeline):
log.err(None, whyfmt % {'request': request, 'referer': referer}, spider=info.spider)
raise FileException(str(exc))
return {'url': request.url, 'path': key, 'checksum': checksum}
return {'url': request.url, 'path': path, 'checksum': checksum}
def inc_stats(self, spider, status):
spider.crawler.stats.inc_value('file_count', spider=spider)
@ -254,15 +254,10 @@ class FilesPipeline(MediaPipeline):
def get_media_requests(self, item, info):
return [Request(x) for x in item.get(self.FILES_URLS_FIELD, [])]
def file_key(self, url):
media_guid = hashlib.sha1(url).hexdigest()
media_ext = os.path.splitext(url)[1]
return 'full/%s%s' % (media_guid, media_ext)
def file_downloaded(self, response, request, info):
key = self.file_key(request.url)
path = self.file_path(request, response=response, info=info)
buf = StringIO(response.body)
self.store.persist_file(key, buf, info)
self.store.persist_file(path, buf, info)
checksum = md5sum(buf)
return checksum
@ -270,3 +265,34 @@ class FilesPipeline(MediaPipeline):
if self.FILES_RESULT_FIELD in item.fields:
item[self.FILES_RESULT_FIELD] = [x for ok, x in results if ok]
return item
def file_path(self, request, response=None, info=None):
## start of deprecation warning block (can be removed in the future)
def _warn():
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
warnings.warn('FilesPipeline.file_key(url) method is deprecated, please use '
'file_path(request, response=None, info=None) instead',
category=ScrapyDeprecationWarning, stacklevel=1)
# check if called from file_key with url as first argument
if not isinstance(request, Request):
_warn()
url = request
else:
url = request.url
# detect if file_key() method has been overridden
if not hasattr(self.file_key, '_base'):
_warn()
return self.file_key(url)
## end of deprecation warning block
media_guid = hashlib.sha1(url).hexdigest() # change to request.url after deprecation
media_ext = os.path.splitext(url)[1] # change to request.url after deprecation
return 'full/%s%s' % (media_guid, media_ext)
# deprecated
def file_key(self, url):
return self.file_path(url)
file_key._base = True

View File

@ -51,27 +51,24 @@ class ImagesPipeline(FilesPipeline):
store_uri = settings['IMAGES_STORE']
return cls(store_uri)
def file_key(self, url):
return self.image_key(url)
def file_downloaded(self, response, request, info):
return self.image_downloaded(response, request, info)
def image_downloaded(self, response, request, info):
checksum = None
for key, image, buf in self.get_images(response, request, info):
for path, image, buf in self.get_images(response, request, info):
if checksum is None:
buf.seek(0)
checksum = md5sum(buf)
width, height = image.size
self.store.persist_file(
key, buf, info,
path, buf, info,
meta={'width': width, 'height': height},
headers={'Content-Type': 'image/jpeg'})
return checksum
def get_images(self, response, request, info):
key = self.file_key(request.url)
path = self.file_path(request, response=response, info=info)
orig_image = Image.open(StringIO(response.body))
width, height = orig_image.size
@ -80,12 +77,12 @@ class ImagesPipeline(FilesPipeline):
(width, height, self.MIN_WIDTH, self.MIN_HEIGHT))
image, buf = self.convert_image(orig_image)
yield key, image, buf
yield path, image, buf
for thumb_id, size in self.THUMBS.iteritems():
thumb_key = self.thumb_key(request.url, thumb_id)
thumb_path = self.thumb_path(request, thumb_id, response=response, info=info)
thumb_image, thumb_buf = self.convert_image(image, size)
yield thumb_key, thumb_image, thumb_buf
yield thumb_path, thumb_image, thumb_buf
def convert_image(self, image, size=None):
if image.format == 'PNG' and image.mode == 'RGBA':
@ -103,19 +100,78 @@ class ImagesPipeline(FilesPipeline):
image.save(buf, 'JPEG')
return image, buf
def thumb_key(self, url, thumb_id):
image_guid = hashlib.sha1(url).hexdigest()
return 'thumbs/%s/%s.jpg' % (thumb_id, image_guid)
def get_media_requests(self, item, info):
return [Request(x) for x in item.get(self.IMAGES_URLS_FIELD, [])]
# backwards compatibility
def image_key(self, url):
media_guid = hashlib.sha1(url).hexdigest()
return 'full/%s.jpg' % (media_guid)
def item_completed(self, results, item, info):
if self.IMAGES_RESULT_FIELD in item.fields:
item[self.IMAGES_RESULT_FIELD] = [x for ok, x in results if ok]
return item
def file_path(self, request, response=None, info=None):
## start of deprecation warning block (can be removed in the future)
def _warn():
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, '
'please use file_path(request, response=None, info=None) instead',
category=ScrapyDeprecationWarning, stacklevel=1)
# check if called from image_key or file_key with url as first argument
if not isinstance(request, Request):
_warn()
url = request
else:
url = request.url
# detect if file_key() or image_key() methods have been overridden
if not hasattr(self.file_key, '_base'):
_warn()
return self.file_key(url)
elif not hasattr(self.image_key, '_base'):
_warn()
return self.image_key(url)
## end of deprecation warning block
image_guid = hashlib.sha1(url).hexdigest() # change to request.url after deprecation
return 'full/%s.jpg' % (image_guid)
def thumb_path(self, request, thumb_id, response=None, info=None):
## start of deprecation warning block (can be removed in the future)
def _warn():
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
warnings.warn('ImagesPipeline.thumb_key(url) method is deprecated, please use '
'thumb_path(request, thumb_id, response=None, info=None) instead',
category=ScrapyDeprecationWarning, stacklevel=1)
# check if called from thumb_key with url as first argument
if not isinstance(request, Request):
_warn()
url = request
else:
url = request.url
# detect if thumb_key() method has been overridden
if not hasattr(self.thumb_key, '_base'):
_warn()
return self.thumb_key(url, thumb_id)
## end of deprecation warning block
thumb_guid = hashlib.sha1(url).hexdigest() # change to request.url after deprecation
return 'thumbs/%s/%s.jpg' % (thumb_id, thumb_guid)
# deprecated
def file_key(self, url):
return self.image_key(url)
file_key._base = True
# deprecated
def image_key(self, url):
return self.file_path(url)
image_key._base = True
# deprecated
def thumb_key(self, url, thumb_id):
return self.thumb_path(url, thumb_id)
thumb_key._base = True

View File

@ -1,6 +1,8 @@
import mock
import os
import time
import hashlib
import warnings
from tempfile import mkdtemp
from shutil import rmtree
@ -30,27 +32,31 @@ class FilesPipelineTestCase(unittest.TestCase):
rmtree(self.tempdir)
def test_file_path(self):
image_path = self.pipeline.file_key
self.assertEqual(image_path("https://dev.mydeco.com/mydeco.pdf"),
file_path = self.pipeline.file_path
self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
self.assertEqual(image_path("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt"),
self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt")),
'full/4ce274dd83db0368bafd7e406f382ae088e39219.txt')
self.assertEqual(image_path("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc"),
self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc")),
'full/94ccc495a17b9ac5d40e3eabf3afcb8c2c9b9e1a.doc')
self.assertEqual(image_path("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg"),
self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
self.assertEqual(image_path("http://www.dorma.co.uk/images/product_details/2532/"),
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2')
self.assertEqual(image_path("http://www.dorma.co.uk/images/product_details/2532"),
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
response=Response("http://www.dorma.co.uk/images/product_details/2532"),
info=object()),
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
def test_fs_store(self):
assert isinstance(self.pipeline.store, FSFilesStore)
self.assertEqual(self.pipeline.store.basedir, self.tempdir)
key = 'some/image/key.jpg'
path = os.path.join(self.tempdir, 'some', 'image', 'key.jpg')
self.assertEqual(self.pipeline.store._get_filesystem_path(key), path)
path = 'some/image/key.jpg'
fullpath = os.path.join(self.tempdir, 'some', 'image', 'key.jpg')
self.assertEqual(self.pipeline.store._get_filesystem_path(path), fullpath)
@defer.inlineCallbacks
def test_file_not_expired(self):
@ -93,6 +99,45 @@ class FilesPipelineTestCase(unittest.TestCase):
for p in patchers:
p.stop()
class DeprecatedFilesPipeline(FilesPipeline):
def file_key(self, url):
media_guid = hashlib.sha1(url).hexdigest()
media_ext = os.path.splitext(url)[1]
return 'empty/%s%s' % (media_guid, media_ext)
class DeprecatedFilesPipelineTestCase(unittest.TestCase):
def setUp(self):
self.tempdir = mkdtemp()
def init_pipeline(self, pipeline_class):
self.pipeline = pipeline_class.from_settings(Settings({'FILES_STORE': self.tempdir}))
self.pipeline.download_func = _mocked_download_func
self.pipeline.open_spider(None)
def test_default_file_key_method(self):
self.init_pipeline(FilesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.file_key("https://dev.mydeco.com/mydeco.pdf"),
'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
self.assertEqual(len(w), 1)
self.assertTrue('file_key(url) method is deprecated' in str(w[-1].message))
def test_overridden_file_key_method(self):
self.init_pipeline(DeprecatedFilesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
'empty/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
self.assertEqual(len(w), 1)
self.assertTrue('file_key(url) method is deprecated' in str(w[-1].message))
def tearDown(self):
rmtree(self.tempdir)
class FilesPipelineTestCaseFields(unittest.TestCase):
def test_item_fields_default(self):
@ -109,7 +154,7 @@ class FilesPipelineTestCaseFields(unittest.TestCase):
results = [(True, {'url': url})]
pipeline.item_completed(results, item, None)
self.assertEqual(item['files'], [results[0][1]])
def test_item_fields_override_settings(self):
from scrapy.contrib.pipeline.files import FilesPipeline
class TestItem(Item):
@ -125,7 +170,8 @@ class FilesPipelineTestCaseFields(unittest.TestCase):
results = [(True, {'url': url})]
pipeline.item_completed(results, item, None)
self.assertEqual(item['stored_file'], [results[0][1]])
class ItemWithFiles(Item):
file_urls = Field()
files = Field()
@ -142,5 +188,6 @@ def _prepare_request_object(item_url):
item_url,
meta={'response': Response(item_url, status=200, body='data')})
if __name__ == "__main__":
unittest.main()

View File

@ -1,4 +1,6 @@
import os
import hashlib
import warnings
from cStringIO import StringIO
from tempfile import mkdtemp
from shutil import rmtree
@ -6,7 +8,9 @@ from shutil import rmtree
from twisted.trial import unittest
from scrapy.item import Item, Field
from scrapy.http import Request, Response
from scrapy.settings import Settings
from scrapy.contrib.pipeline.images import ImagesPipeline
skip = False
try:
@ -29,39 +33,46 @@ class ImagesPipelineTestCase(unittest.TestCase):
skip = skip
def setUp(self):
from scrapy.contrib.pipeline.images import ImagesPipeline
self.tempdir = mkdtemp()
self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func)
def tearDown(self):
rmtree(self.tempdir)
def test_image_path(self):
image_path = self.pipeline.file_key
self.assertEqual(image_path("https://dev.mydeco.com/mydeco.gif"),
def test_file_path(self):
file_path = self.pipeline.file_path
self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.gif")),
'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
self.assertEqual(image_path("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg"),
self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
self.assertEqual(image_path("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif"),
self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
self.assertEqual(image_path("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg"),
self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
self.assertEqual(image_path("http://www.dorma.co.uk/images/product_details/2532/"),
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
self.assertEqual(image_path("http://www.dorma.co.uk/images/product_details/2532"),
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
response=Response("http://www.dorma.co.uk/images/product_details/2532"),
info=object()),
'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
def test_thumbnail_name(self):
thumbnail_name = self.pipeline.thumb_key
thumb_path = self.pipeline.thumb_path
name = '50'
self.assertEqual(thumbnail_name("/tmp/foo.jpg", name),
'thumbs/50/271f172bb4727281011c80fe763e93a47bb6b3fe.jpg')
self.assertEqual(thumbnail_name("foo.png", name),
'thumbs/50/0945c699b5580b99e4f40dffc009699b2b6830a7.jpg')
self.assertEqual(thumbnail_name("/tmp/foo", name),
'thumbs/50/469150566bd728fc90b4adf6495202fd70ec3537.jpg')
self.assertEqual(thumbnail_name("/tmp/some.name/foo", name),
'thumbs/50/92dac2a6a2072c5695a5dff1f865b3cb70c657bb.jpg')
self.assertEqual(thumb_path(Request("file:///tmp/foo.jpg"), name),
'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
self.assertEqual(thumb_path(Request("file://foo.png"), name),
'thumbs/50/e55b765eba0ec7348e50a1df496040449071b96a.jpg')
self.assertEqual(thumb_path(Request("file:///tmp/foo"), name),
'thumbs/50/0329ad83ebb8e93ea7c7906d46e9ed55f7349a50.jpg')
self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name),
'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')
self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name,
response=Response("file:///tmp/some.name/foo"),
info=object()),
'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')
def test_convert_image(self):
SIZE = (100, 100)
@ -84,6 +95,77 @@ class ImagesPipelineTestCase(unittest.TestCase):
self.assertEquals(converted.mode, 'RGB')
self.assertEquals(converted.getcolors(), [(10000, (205, 230, 255))])
class DeprecatedImagesPipeline(ImagesPipeline):
def file_key(self, url):
return self.image_key(url)
def image_key(self, url):
image_guid = hashlib.sha1(url).hexdigest()
return 'empty/%s.jpg' % (image_guid)
def thumb_key(self, url, thumb_id):
thumb_guid = hashlib.sha1(url).hexdigest()
return 'thumbsup/%s/%s.jpg' % (thumb_id, thumb_guid)
class DeprecatedImagesPipelineTestCase(unittest.TestCase):
def setUp(self):
self.tempdir = mkdtemp()
def init_pipeline(self, pipeline_class):
self.pipeline = pipeline_class(self.tempdir, download_func=_mocked_download_func)
self.pipeline.open_spider(None)
def test_default_file_key_method(self):
self.init_pipeline(ImagesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.file_key("https://dev.mydeco.com/mydeco.gif"),
'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
self.assertEqual(len(w), 1)
self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
def test_default_image_key_method(self):
self.init_pipeline(ImagesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.image_key("https://dev.mydeco.com/mydeco.gif"),
'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
self.assertEqual(len(w), 1)
self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
def test_overridden_file_key_method(self):
self.init_pipeline(DeprecatedImagesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.file_path(Request("https://dev.mydeco.com/mydeco.gif")),
'empty/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
self.assertEqual(len(w), 1)
self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
def test_default_thumb_key_method(self):
self.init_pipeline(ImagesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.thumb_key("file:///tmp/foo.jpg", 50),
'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
self.assertEqual(len(w), 1)
self.assertTrue('thumb_key(url) method is deprecated' in str(w[-1].message))
def test_overridden_thumb_key_method(self):
self.init_pipeline(DeprecatedImagesPipeline)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertEqual(self.pipeline.thumb_path(Request("file:///tmp/foo.jpg"), 50),
'thumbsup/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
self.assertEqual(len(w), 1)
self.assertTrue('thumb_key(url) method is deprecated' in str(w[-1].message))
def tearDown(self):
rmtree(self.tempdir)
class ImagesPipelineTestCaseFields(unittest.TestCase):
def test_item_fields_default(self):
@ -100,7 +182,7 @@ class ImagesPipelineTestCaseFields(unittest.TestCase):
results = [(True, {'url': url})]
pipeline.item_completed(results, item, None)
self.assertEqual(item['images'], [results[0][1]])
def test_item_fields_override_settings(self):
from scrapy.contrib.pipeline.images import ImagesPipeline
class TestItem(Item):
@ -116,7 +198,8 @@ class ImagesPipelineTestCaseFields(unittest.TestCase):
results = [(True, {'url': url})]
pipeline.item_completed(results, item, None)
self.assertEqual(item['stored_image'], [results[0][1]])
def _create_image(format, *a, **kw):
buf = StringIO()
Image.new(*a, **kw).save(buf, format)