1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-13 03:31:19 +00:00
scrapy/tests/test_pipelines.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

134 lines
3.8 KiB
Python
Raw Permalink Normal View History

import asyncio
import pytest
2019-09-10 14:23:11 +05:00
from twisted.internet import defer
from twisted.internet.defer import Deferred
from twisted.trial import unittest
2023-01-25 14:25:15 -06:00
from scrapy import Request, Spider, signals
from scrapy.utils.defer import deferred_to_future, maybe_deferred_to_future
from scrapy.utils.test import get_crawler, get_from_asyncio_queue
2019-09-10 14:23:11 +05:00
from tests.mockserver import MockServer
class SimplePipeline:
def process_item(self, item, spider):
item["pipeline_passed"] = True
return item
class DeferredPipeline:
def cb(self, item):
item["pipeline_passed"] = True
return item
def process_item(self, item, spider):
d = Deferred()
d.addCallback(self.cb)
d.callback(item)
return d
2019-09-10 14:26:21 +05:00
class AsyncDefPipeline:
async def process_item(self, item, spider):
2021-10-22 21:46:01 +05:00
d = Deferred()
from twisted.internet import reactor
2022-11-29 11:30:46 -03:00
2021-10-22 21:46:01 +05:00
reactor.callLater(0, d.callback, None)
await maybe_deferred_to_future(d)
2019-09-10 14:26:21 +05:00
item["pipeline_passed"] = True
return item
class AsyncDefAsyncioPipeline:
async def process_item(self, item, spider):
2021-10-22 21:46:01 +05:00
d = Deferred()
from twisted.internet import reactor
2022-11-29 11:30:46 -03:00
2021-10-22 21:46:01 +05:00
reactor.callLater(0, d.callback, None)
await deferred_to_future(d)
await asyncio.sleep(0.2)
item["pipeline_passed"] = await get_from_asyncio_queue(True)
return item
2021-10-22 21:46:01 +05:00
class AsyncDefNotAsyncioPipeline:
async def process_item(self, item, spider):
d1 = Deferred()
from twisted.internet import reactor
2022-11-29 11:30:46 -03:00
2021-10-22 21:46:01 +05:00
reactor.callLater(0, d1.callback, None)
await d1
d2 = Deferred()
reactor.callLater(0, d2.callback, None)
await maybe_deferred_to_future(d2)
item["pipeline_passed"] = True
return item
2019-09-10 14:23:11 +05:00
class ItemSpider(Spider):
name = "itemspider"
def start_requests(self):
yield Request(self.mockserver.url("/status?n=200"))
def parse(self, response):
return {"field": 42}
class TestPipeline(unittest.TestCase):
2025-02-02 14:10:09 +05:00
@classmethod
def setUpClass(cls):
cls.mockserver = MockServer()
cls.mockserver.__enter__()
@classmethod
def tearDownClass(cls):
cls.mockserver.__exit__(None, None, None)
2019-09-10 14:23:11 +05:00
def _on_item_scraped(self, item):
assert isinstance(item, dict)
assert item.get("pipeline_passed")
2019-09-10 14:23:11 +05:00
self.items.append(item)
def _create_crawler(self, pipeline_class):
settings = {
"ITEM_PIPELINES": {pipeline_class: 1},
2019-09-10 14:23:11 +05:00
}
crawler = get_crawler(ItemSpider, settings)
crawler.signals.connect(self._on_item_scraped, signals.item_scraped)
self.items = []
return crawler
@defer.inlineCallbacks
def test_simple_pipeline(self):
crawler = self._create_crawler(SimplePipeline)
yield crawler.crawl(mockserver=self.mockserver)
assert len(self.items) == 1
2019-09-10 14:23:11 +05:00
@defer.inlineCallbacks
def test_deferred_pipeline(self):
crawler = self._create_crawler(DeferredPipeline)
yield crawler.crawl(mockserver=self.mockserver)
assert len(self.items) == 1
2019-09-10 14:26:21 +05:00
@defer.inlineCallbacks
def test_asyncdef_pipeline(self):
crawler = self._create_crawler(AsyncDefPipeline)
yield crawler.crawl(mockserver=self.mockserver)
assert len(self.items) == 1
@pytest.mark.only_asyncio
@defer.inlineCallbacks
def test_asyncdef_asyncio_pipeline(self):
crawler = self._create_crawler(AsyncDefAsyncioPipeline)
yield crawler.crawl(mockserver=self.mockserver)
assert len(self.items) == 1
2021-10-22 21:46:01 +05:00
@pytest.mark.only_not_asyncio
2021-10-22 21:46:01 +05:00
@defer.inlineCallbacks
def test_asyncdef_not_asyncio_pipeline(self):
crawler = self._create_crawler(AsyncDefNotAsyncioPipeline)
yield crawler.crawl(mockserver=self.mockserver)
assert len(self.items) == 1