1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-15 13:12:09 +00:00

Fix missing storage.store calls in FeedExporter.close_spider (#4626)

This commit is contained in:
Stas Glubokiy 2020-06-17 18:08:14 +03:00 committed by GitHub
parent 89e900e013
commit 3d027fb578
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 2 deletions

View File

@ -270,7 +270,9 @@ class FeedExporter:
if not slot.itemcount and not slot.store_empty:
# We need to call slot.storage.store nonetheless to get the file
# properly closed.
return defer.maybeDeferred(slot.storage.store, slot.file)
d = defer.maybeDeferred(slot.storage.store, slot.file)
deferred_list.append(d)
continue
slot.finish_exporting()
logfmt = "%s %%(format)s feed (%%(itemcount)d items) in: %%(uri)s"
log_args = {'format': slot.format,

View File

@ -7,6 +7,7 @@ import string
import tempfile
import warnings
from io import BytesIO
from logging import getLogger
from pathlib import Path
from string import ascii_letters, digits
from unittest import mock
@ -14,9 +15,11 @@ from urllib.parse import urljoin, urlparse, quote
from urllib.request import pathname2url
import lxml.etree
from testfixtures import LogCapture
from twisted.internet import defer
from twisted.trial import unittest
from w3lib.url import path_to_file_uri
from w3lib.url import file_uri_to_path, path_to_file_uri
from zope.interface import implementer
from zope.interface.verify import verifyObject
import scrapy
@ -390,6 +393,25 @@ class FromCrawlerFileFeedStorage(FileFeedStorage, FromCrawlerMixin):
pass
@implementer(IFeedStorage)
class LogOnStoreFileStorage:
"""
This storage logs inside `store` method.
It can be used to make sure `store` method is invoked.
"""
def __init__(self, uri):
self.path = file_uri_to_path(uri)
self.logger = getLogger()
def open(self, spider):
return tempfile.NamedTemporaryFile(prefix='feed-')
def store(self, file):
self.logger.info('Storage.store is called')
file.close()
class FeedExportTest(unittest.TestCase):
class MyItem(scrapy.Item):
@ -426,11 +448,17 @@ class FeedExportTest(unittest.TestCase):
yield runner.crawl(spider_cls)
for file_path, feed in FEEDS.items():
if not os.path.exists(str(file_path)):
continue
with open(str(file_path), 'rb') as f:
content[feed['format']] = f.read()
finally:
for file_path in FEEDS.keys():
if not os.path.exists(str(file_path)):
continue
os.remove(str(file_path))
return content
@ -623,6 +651,25 @@ class FeedExportTest(unittest.TestCase):
data = yield self.exported_no_data(settings)
self.assertEqual(data[fmt], expctd)
@defer.inlineCallbacks
def test_export_no_items_multiple_feeds(self):
""" Make sure that `storage.store` is called for every feed. """
settings = {
'FEEDS': {
self._random_temp_filename(): {'format': 'json'},
self._random_temp_filename(): {'format': 'xml'},
self._random_temp_filename(): {'format': 'csv'},
},
'FEED_STORAGES': {'file': 'tests.test_feedexport.LogOnStoreFileStorage'},
'FEED_STORE_EMPTY': False
}
with LogCapture() as log:
yield self.exported_no_data(settings)
print(log)
self.assertEqual(str(log).count('Storage.store is called'), 3)
@defer.inlineCallbacks
def test_export_multiple_item_classes(self):