1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 10:24:24 +00:00
scrapy/tests/test_engine_stop_download_bytes.py
2025-02-01 16:07:55 +05:00

75 lines
2.9 KiB
Python

from testfixtures import LogCapture
from twisted.internet import defer
from scrapy.exceptions import StopDownload
from tests.test_engine import (
AttrsItemsSpider,
CrawlerRun,
DataClassItemsSpider,
DictItemsSpider,
EngineTestBase,
TestSpider,
)
class BytesReceivedCrawlerRun(CrawlerRun):
def bytes_received(self, data, request, spider):
super().bytes_received(data, request, spider)
raise StopDownload(fail=False)
class BytesReceivedEngineTest(EngineTestBase):
@defer.inlineCallbacks
def test_crawler(self):
for spider in (
TestSpider,
DictItemsSpider,
AttrsItemsSpider,
DataClassItemsSpider,
):
run = BytesReceivedCrawlerRun(spider)
with LogCapture() as log:
yield run.run()
log.check_present(
(
"scrapy.core.downloader.handlers.http11",
"DEBUG",
f"Download stopped for <GET http://localhost:{run.portno}/redirected> "
"from signal handler BytesReceivedCrawlerRun.bytes_received",
)
)
log.check_present(
(
"scrapy.core.downloader.handlers.http11",
"DEBUG",
f"Download stopped for <GET http://localhost:{run.portno}/> "
"from signal handler BytesReceivedCrawlerRun.bytes_received",
)
)
log.check_present(
(
"scrapy.core.downloader.handlers.http11",
"DEBUG",
f"Download stopped for <GET http://localhost:{run.portno}/numbers> "
"from signal handler BytesReceivedCrawlerRun.bytes_received",
)
)
self._assert_visited_urls(run)
self._assert_scheduled_requests(run, count=9)
self._assert_downloaded_responses(run, count=9)
self._assert_signals_caught(run)
self._assert_headers_received(run)
self._assert_bytes_received(run)
def _assert_bytes_received(self, run: CrawlerRun):
self.assertEqual(9, len(run.bytes))
for request, data in run.bytes.items():
joined_data = b"".join(data)
self.assertTrue(len(data) == 1) # signal was fired only once
if run.getpath(request.url) == "/numbers":
# Received bytes are not the complete response. The exact amount depends
# on the buffer size, which can vary, so we only check that the amount
# of received bytes is strictly less than the full response.
numbers = [str(x).encode("utf8") for x in range(2**18)]
self.assertTrue(len(joined_data) < len(b"".join(numbers)))