2019-09-10 14:57:07 +05:00
|
|
|
import asyncio
|
|
|
|
|
2025-02-16 23:28:58 +04:00
|
|
|
import pytest
|
2019-09-10 14:23:11 +05:00
|
|
|
from twisted.internet import defer
|
|
|
|
from twisted.internet.defer import Deferred
|
|
|
|
from twisted.trial import unittest
|
|
|
|
|
2023-01-25 14:25:15 -06:00
|
|
|
from scrapy import Request, Spider, signals
|
|
|
|
from scrapy.utils.defer import deferred_to_future, maybe_deferred_to_future
|
2020-01-09 14:48:07 +05:00
|
|
|
from scrapy.utils.test import get_crawler, get_from_asyncio_queue
|
2019-09-10 14:23:11 +05:00
|
|
|
from tests.mockserver import MockServer
|
|
|
|
|
|
|
|
|
|
|
|
class SimplePipeline:
|
|
|
|
def process_item(self, item, spider):
|
|
|
|
item["pipeline_passed"] = True
|
|
|
|
return item
|
|
|
|
|
|
|
|
|
|
|
|
class DeferredPipeline:
|
|
|
|
def cb(self, item):
|
|
|
|
item["pipeline_passed"] = True
|
|
|
|
return item
|
|
|
|
|
|
|
|
def process_item(self, item, spider):
|
|
|
|
d = Deferred()
|
|
|
|
d.addCallback(self.cb)
|
|
|
|
d.callback(item)
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
2019-09-10 14:26:21 +05:00
|
|
|
class AsyncDefPipeline:
|
|
|
|
async def process_item(self, item, spider):
|
2021-10-22 21:46:01 +05:00
|
|
|
d = Deferred()
|
|
|
|
from twisted.internet import reactor
|
2022-11-29 11:30:46 -03:00
|
|
|
|
2021-10-22 21:46:01 +05:00
|
|
|
reactor.callLater(0, d.callback, None)
|
|
|
|
await maybe_deferred_to_future(d)
|
2019-09-10 14:26:21 +05:00
|
|
|
item["pipeline_passed"] = True
|
|
|
|
return item
|
|
|
|
|
|
|
|
|
2019-09-10 14:57:07 +05:00
|
|
|
class AsyncDefAsyncioPipeline:
|
|
|
|
async def process_item(self, item, spider):
|
2021-10-22 21:46:01 +05:00
|
|
|
d = Deferred()
|
|
|
|
from twisted.internet import reactor
|
2022-11-29 11:30:46 -03:00
|
|
|
|
2021-10-22 21:46:01 +05:00
|
|
|
reactor.callLater(0, d.callback, None)
|
|
|
|
await deferred_to_future(d)
|
2019-09-10 14:57:07 +05:00
|
|
|
await asyncio.sleep(0.2)
|
2020-01-09 14:48:07 +05:00
|
|
|
item["pipeline_passed"] = await get_from_asyncio_queue(True)
|
2019-09-10 14:57:07 +05:00
|
|
|
return item
|
|
|
|
|
|
|
|
|
2021-10-22 21:46:01 +05:00
|
|
|
class AsyncDefNotAsyncioPipeline:
|
|
|
|
async def process_item(self, item, spider):
|
|
|
|
d1 = Deferred()
|
|
|
|
from twisted.internet import reactor
|
2022-11-29 11:30:46 -03:00
|
|
|
|
2021-10-22 21:46:01 +05:00
|
|
|
reactor.callLater(0, d1.callback, None)
|
|
|
|
await d1
|
|
|
|
d2 = Deferred()
|
|
|
|
reactor.callLater(0, d2.callback, None)
|
|
|
|
await maybe_deferred_to_future(d2)
|
|
|
|
item["pipeline_passed"] = True
|
|
|
|
return item
|
|
|
|
|
|
|
|
|
2019-09-10 14:23:11 +05:00
|
|
|
class ItemSpider(Spider):
|
|
|
|
name = "itemspider"
|
|
|
|
|
|
|
|
def start_requests(self):
|
|
|
|
yield Request(self.mockserver.url("/status?n=200"))
|
|
|
|
|
|
|
|
def parse(self, response):
|
|
|
|
return {"field": 42}
|
|
|
|
|
|
|
|
|
2025-03-09 23:24:45 +04:00
|
|
|
class TestPipeline(unittest.TestCase):
|
2025-02-02 14:10:09 +05:00
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
cls.mockserver = MockServer()
|
|
|
|
cls.mockserver.__enter__()
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls):
|
|
|
|
cls.mockserver.__exit__(None, None, None)
|
2019-09-10 14:23:11 +05:00
|
|
|
|
|
|
|
def _on_item_scraped(self, item):
|
2025-03-09 23:24:45 +04:00
|
|
|
assert isinstance(item, dict)
|
|
|
|
assert item.get("pipeline_passed")
|
2019-09-10 14:23:11 +05:00
|
|
|
self.items.append(item)
|
|
|
|
|
|
|
|
def _create_crawler(self, pipeline_class):
|
|
|
|
settings = {
|
2020-10-01 23:11:11 -03:00
|
|
|
"ITEM_PIPELINES": {pipeline_class: 1},
|
2019-09-10 14:23:11 +05:00
|
|
|
}
|
|
|
|
crawler = get_crawler(ItemSpider, settings)
|
|
|
|
crawler.signals.connect(self._on_item_scraped, signals.item_scraped)
|
|
|
|
self.items = []
|
|
|
|
return crawler
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_simple_pipeline(self):
|
|
|
|
crawler = self._create_crawler(SimplePipeline)
|
|
|
|
yield crawler.crawl(mockserver=self.mockserver)
|
2025-03-09 23:24:45 +04:00
|
|
|
assert len(self.items) == 1
|
2019-09-10 14:23:11 +05:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_deferred_pipeline(self):
|
|
|
|
crawler = self._create_crawler(DeferredPipeline)
|
|
|
|
yield crawler.crawl(mockserver=self.mockserver)
|
2025-03-09 23:24:45 +04:00
|
|
|
assert len(self.items) == 1
|
2019-09-10 14:26:21 +05:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_asyncdef_pipeline(self):
|
|
|
|
crawler = self._create_crawler(AsyncDefPipeline)
|
|
|
|
yield crawler.crawl(mockserver=self.mockserver)
|
2025-03-09 23:24:45 +04:00
|
|
|
assert len(self.items) == 1
|
2019-09-10 14:57:07 +05:00
|
|
|
|
2025-02-16 23:28:58 +04:00
|
|
|
@pytest.mark.only_asyncio
|
2019-09-10 14:57:07 +05:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_asyncdef_asyncio_pipeline(self):
|
|
|
|
crawler = self._create_crawler(AsyncDefAsyncioPipeline)
|
|
|
|
yield crawler.crawl(mockserver=self.mockserver)
|
2025-03-09 23:24:45 +04:00
|
|
|
assert len(self.items) == 1
|
2021-10-22 21:46:01 +05:00
|
|
|
|
2025-02-16 23:28:58 +04:00
|
|
|
@pytest.mark.only_not_asyncio
|
2021-10-22 21:46:01 +05:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def test_asyncdef_not_asyncio_pipeline(self):
|
|
|
|
crawler = self._create_crawler(AsyncDefNotAsyncioPipeline)
|
|
|
|
yield crawler.crawl(mockserver=self.mockserver)
|
2025-03-09 23:24:45 +04:00
|
|
|
assert len(self.items) == 1
|