1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-13 19:04:50 +00:00
scrapy/tests/test_contracts.py

417 lines
13 KiB
Python
Raw Normal View History

2014-06-03 15:26:01 +02:00
from unittest import TextTestResult
2018-08-15 20:24:00 +03:00
from twisted.internet import defer
2018-08-09 21:07:25 +03:00
from twisted.python import failure
from twisted.trial import unittest
from scrapy import FormRequest
2018-08-15 20:24:00 +03:00
from scrapy.crawler import CrawlerRunner
2018-08-09 21:07:25 +03:00
from scrapy.spidermiddlewares.httperror import HttpError
from scrapy.spiders import Spider
from scrapy.http import Request
from scrapy.item import Item, Field
2018-08-11 17:50:56 +03:00
from scrapy.contracts import ContractsManager, Contract
from scrapy.contracts.default import (
UrlContract,
2019-08-31 02:44:09 -03:00
CallbackKeywordArgumentsContract,
ReturnsContract,
ScrapesContract,
)
2018-08-15 20:39:43 +03:00
from tests.mockserver import MockServer
class TestItem(Item):
name = Field()
url = Field()
2020-03-16 16:12:46 -03:00
class ResponseMock:
url = 'http://scrapy.org'
2018-08-11 17:50:56 +03:00
class CustomSuccessContract(Contract):
name = 'custom_success_contract'
def adjust_request_args(self, args):
args['url'] = 'http://scrapy.org'
return args
class CustomFailContract(Contract):
name = 'custom_fail_contract'
def adjust_request_args(self, args):
raise TypeError('Error in adjust_request_args')
class CustomFormContract(Contract):
name = 'custom_form'
request_cls = FormRequest
def adjust_request_args(self, args):
args['formdata'] = {'name': 'scrapy'}
return args
class TestSpider(Spider):
name = 'demo_spider'
def returns_request(self, response):
""" method which returns request
@url http://scrapy.org
@returns requests 1
"""
return Request('http://scrapy.org', callback=self.returns_item)
def returns_item(self, response):
""" method which returns item
@url http://scrapy.org
@returns items 1 1
"""
return TestItem(url=response.url)
2019-08-31 02:44:09 -03:00
def returns_request_cb_kwargs(self, response, url):
""" method which returns request
@url https://example.org
@cb_kwargs {"url": "http://scrapy.org"}
@returns requests 1
"""
return Request(url, callback=self.returns_item_cb_kwargs)
def returns_item_cb_kwargs(self, response, name):
""" method which returns item
@url http://scrapy.org
@cb_kwargs {"name": "Scrapy"}
@returns items 1 1
"""
return TestItem(name=name, url=response.url)
def returns_item_cb_kwargs_error_unexpected_keyword(self, response):
""" method which returns item
@url http://scrapy.org
@cb_kwargs {"arg": "value"}
@returns items 1 1
"""
return TestItem(url=response.url)
def returns_item_cb_kwargs_error_missing_argument(self, response, arg):
""" method which returns item
@url http://scrapy.org
@returns items 1 1
"""
return TestItem(url=response.url)
def returns_dict_item(self, response):
""" method which returns item
@url http://scrapy.org
@returns items 1 1
"""
return {"url": response.url}
def returns_fail(self, response):
""" method which returns item
@url http://scrapy.org
@returns items 0 0
"""
return TestItem(url=response.url)
def returns_dict_fail(self, response):
""" method which returns item
@url http://scrapy.org
@returns items 0 0
"""
return {'url': response.url}
def scrapes_item_ok(self, response):
""" returns item with name and url
@url http://scrapy.org
@returns items 1 1
@scrapes name url
"""
return TestItem(name='test', url=response.url)
def scrapes_dict_item_ok(self, response):
""" returns item with name and url
@url http://scrapy.org
@returns items 1 1
@scrapes name url
"""
return {'name': 'test', 'url': response.url}
def scrapes_item_fail(self, response):
""" returns item with no name
@url http://scrapy.org
@returns items 1 1
@scrapes name url
"""
return TestItem(url=response.url)
def scrapes_dict_item_fail(self, response):
""" returns item with no name
@url http://scrapy.org
@returns items 1 1
@scrapes name url
"""
return {'url': response.url}
def scrapes_multiple_missing_fields(self, response):
""" returns item with no name
@url http://scrapy.org
@returns items 1 1
@scrapes name url
"""
return {}
def parse_no_url(self, response):
""" method with no url
@returns items 1 1
"""
pass
def custom_form(self, response):
"""
@url http://scrapy.org
@custom_form
"""
pass
2018-08-11 17:50:56 +03:00
class CustomContractSuccessSpider(Spider):
name = 'custom_contract_success_spider'
def parse(self, response):
"""
@custom_success_contract
"""
pass
class CustomContractFailSpider(Spider):
name = 'custom_contract_fail_spider'
def parse(self, response):
"""
@custom_fail_contract
"""
pass
class InheritsTestSpider(TestSpider):
name = 'inherits_demo_spider'
class ContractsManagerTest(unittest.TestCase):
2018-08-11 17:50:56 +03:00
contracts = [
UrlContract,
2019-08-31 02:44:09 -03:00
CallbackKeywordArgumentsContract,
2018-08-11 17:50:56 +03:00
ReturnsContract,
ScrapesContract,
CustomFormContract,
2018-08-11 17:50:56 +03:00
CustomSuccessContract,
CustomFailContract,
2018-08-11 17:50:56 +03:00
]
def setUp(self):
self.conman = ContractsManager(self.contracts)
2014-06-03 15:26:01 +02:00
self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)
def should_succeed(self):
self.assertFalse(self.results.failures)
self.assertFalse(self.results.errors)
def should_fail(self):
self.assertTrue(self.results.failures)
self.assertFalse(self.results.errors)
2018-08-11 17:50:56 +03:00
def should_error(self):
self.assertTrue(self.results.errors)
def test_contracts(self):
2012-10-28 17:53:38 +01:00
spider = TestSpider()
# extract contracts correctly
2012-10-28 17:53:38 +01:00
contracts = self.conman.extract_contracts(spider.returns_request)
self.assertEqual(len(contracts), 2)
2020-05-06 18:56:14 -03:00
self.assertEqual(
frozenset(type(x) for x in contracts),
frozenset([UrlContract, ReturnsContract]))
# returns request for valid method
2012-10-28 17:53:38 +01:00
request = self.conman.from_method(spider.returns_request, self.results)
2012-09-28 18:55:12 -03:00
self.assertNotEqual(request, None)
# no request for missing url
2012-10-28 17:53:38 +01:00
request = self.conman.from_method(spider.parse_no_url, self.results)
2012-09-28 18:55:12 -03:00
self.assertEqual(request, None)
2019-08-31 02:44:09 -03:00
def test_cb_kwargs(self):
spider = TestSpider()
response = ResponseMock()
# extract contracts correctly
contracts = self.conman.extract_contracts(spider.returns_request_cb_kwargs)
self.assertEqual(len(contracts), 3)
self.assertEqual(frozenset(type(x) for x in contracts),
frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))
2019-12-18 16:24:48 +01:00
2019-08-31 02:44:09 -03:00
contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs)
self.assertEqual(len(contracts), 3)
self.assertEqual(frozenset(type(x) for x in contracts),
frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))
contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_unexpected_keyword)
self.assertEqual(len(contracts), 3)
self.assertEqual(frozenset(type(x) for x in contracts),
frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))
contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_missing_argument)
self.assertEqual(len(contracts), 2)
self.assertEqual(frozenset(type(x) for x in contracts),
frozenset([UrlContract, ReturnsContract]))
# returns_request
request = self.conman.from_method(spider.returns_request_cb_kwargs, self.results)
request.callback(response, **request.cb_kwargs)
self.should_succeed()
# returns_item
request = self.conman.from_method(spider.returns_item_cb_kwargs, self.results)
request.callback(response, **request.cb_kwargs)
self.should_succeed()
# returns_item (error, callback doesn't take keyword arguments)
request = self.conman.from_method(spider.returns_item_cb_kwargs_error_unexpected_keyword, self.results)
request.callback(response, **request.cb_kwargs)
self.should_error()
# returns_item (error, contract doesn't provide keyword arguments)
request = self.conman.from_method(spider.returns_item_cb_kwargs_error_missing_argument, self.results)
request.callback(response, **request.cb_kwargs)
self.should_error()
def test_returns(self):
spider = TestSpider()
response = ResponseMock()
# returns_item
request = self.conman.from_method(spider.returns_item, self.results)
2014-06-03 15:26:01 +02:00
request.callback(response)
self.should_succeed()
# returns_dict_item
request = self.conman.from_method(spider.returns_dict_item, self.results)
request.callback(response)
self.should_succeed()
# returns_request
request = self.conman.from_method(spider.returns_request, self.results)
2014-06-03 15:26:01 +02:00
request.callback(response)
self.should_succeed()
# returns_fail
request = self.conman.from_method(spider.returns_fail, self.results)
request.callback(response)
self.should_fail()
# returns_dict_fail
request = self.conman.from_method(spider.returns_dict_fail, self.results)
request.callback(response)
self.should_fail()
def test_scrapes(self):
spider = TestSpider()
response = ResponseMock()
# scrapes_item_ok
request = self.conman.from_method(spider.scrapes_item_ok, self.results)
2014-06-03 15:26:01 +02:00
request.callback(response)
self.should_succeed()
# scrapes_dict_item_ok
request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results)
request.callback(response)
self.should_succeed()
# scrapes_item_fail
2018-08-15 20:24:00 +03:00
request = self.conman.from_method(spider.scrapes_item_fail, self.results)
request.callback(response)
self.should_fail()
# scrapes_dict_item_fail
2018-08-15 20:24:00 +03:00
request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results)
request.callback(response)
self.should_fail()
2018-08-11 17:50:56 +03:00
# scrapes_multiple_missing_fields
request = self.conman.from_method(spider.scrapes_multiple_missing_fields, self.results)
request.callback(response)
self.should_fail()
message = 'ContractFail: Missing fields: name, url'
assert message in self.results.failures[-1][-1]
2018-08-11 17:50:56 +03:00
def test_custom_contracts(self):
self.conman.from_spider(CustomContractSuccessSpider(), self.results)
self.should_succeed()
self.conman.from_spider(CustomContractFailSpider(), self.results)
self.should_error()
2018-08-09 21:07:25 +03:00
def test_errback(self):
spider = TestSpider()
response = ResponseMock()
try:
raise HttpError(response, 'Ignoring non-200 response')
except HttpError:
failure_mock = failure.Failure()
request = self.conman.from_method(spider.returns_request, self.results)
request.errback(failure_mock)
self.assertFalse(self.results.failures)
self.assertTrue(self.results.errors)
2018-08-15 20:24:00 +03:00
@defer.inlineCallbacks
def test_same_url(self):
2018-08-15 20:36:10 +03:00
class TestSameUrlSpider(Spider):
name = 'test_same_url'
def __init__(self, *args, **kwargs):
2020-08-04 20:42:01 +02:00
super().__init__(*args, **kwargs)
2018-08-15 20:36:10 +03:00
self.visited = 0
2018-08-15 20:39:43 +03:00
def start_requests(s):
return self.conman.from_spider(s, self.results)
2018-08-15 20:36:10 +03:00
def parse_first(self, response):
self.visited += 1
return TestItem()
def parse_second(self, response):
self.visited += 1
return TestItem()
2018-08-15 20:39:43 +03:00
with MockServer() as mockserver:
2020-08-22 22:32:03 +02:00
contract_doc = f'@url {mockserver.url("/status?n=200")}'
2019-11-03 12:26:38 -03:00
TestSameUrlSpider.parse_first.__doc__ = contract_doc
TestSameUrlSpider.parse_second.__doc__ = contract_doc
2018-08-18 15:42:21 +03:00
crawler = CrawlerRunner().create_crawler(TestSameUrlSpider)
yield crawler.crawl()
2018-08-15 20:24:00 +03:00
self.assertEqual(crawler.spider.visited, 2)
def test_form_contract(self):
spider = TestSpider()
request = self.conman.from_method(spider.custom_form, self.results)
self.assertEqual(request.method, 'POST')
self.assertIsInstance(request, FormRequest)
def test_inherited_contracts(self):
spider = InheritsTestSpider()
requests = self.conman.from_spider(spider, self.results)
self.assertTrue(requests)