1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 10:24:24 +00:00

Upgrade CI tools

This commit is contained in:
Adrián Chaves 2023-02-02 06:37:40 +01:00
parent c34ca4aef5
commit b07d3f85a3
82 changed files with 15 additions and 189 deletions

View File

@ -5,14 +5,14 @@ repos:
- id: bandit
args: [-r, -c, .bandit.yml]
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 5.0.4 # 6.0.0 drops Python 3.7 support
hooks:
- id: flake8
- repo: https://github.com/psf/black.git
rev: 22.12.0
rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.11.5 # 5.12 drops Python 3.7 support
hooks:
- id: isort

View File

@ -18,7 +18,6 @@ from pathlib import Path
def main():
# Used for remembering the file (and its contents)
# so we don't have to open the same file again.
_filename = None
@ -50,7 +49,6 @@ def main():
else:
# If this is a new file
if newfilename != _filename:
# Update the previous file
if _filename:
Path(_filename).write_text(_contents, encoding="utf-8")

View File

@ -13,7 +13,6 @@ from scrapy.spiders import Spider
class QPSSpider(Spider):
name = "qps"
benchurl = "http://localhost:8880/"

View File

@ -14,7 +14,6 @@ from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
class ScrapyCommand:
requires_project = False
crawler_process: Optional[CrawlerProcess] = None

View File

@ -9,7 +9,6 @@ from scrapy.linkextractors import LinkExtractor
class Command(ScrapyCommand):
default_settings = {
"LOG_LEVEL": "INFO",
"LOGSTATS_INTERVAL": 1,

View File

@ -3,7 +3,6 @@ from scrapy.exceptions import UsageError
class Command(BaseRunSpiderCommand):
requires_project = True
def syntax(self):

View File

@ -6,7 +6,6 @@ from scrapy.exceptions import UsageError
class Command(ScrapyCommand):
requires_project = True
default_settings = {"LOG_ENABLED": False}

View File

@ -10,7 +10,6 @@ from scrapy.utils.spider import DefaultSpider, spidercls_for_request
class Command(ScrapyCommand):
requires_project = False
def syntax(self):

View File

@ -32,7 +32,6 @@ def extract_domain(url):
class Command(ScrapyCommand):
requires_project = False
default_settings = {"LOG_ENABLED": False}

View File

@ -2,7 +2,6 @@ from scrapy.commands import ScrapyCommand
class Command(ScrapyCommand):
requires_project = True
default_settings = {"LOG_ENABLED": False}

View File

@ -24,7 +24,6 @@ def _import_file(filepath: Union[str, PathLike]) -> ModuleType:
class Command(BaseRunSpiderCommand):
requires_project = False
default_settings = {"SPIDER_LOADER_WARN_ONLY": True}

View File

@ -5,7 +5,6 @@ from scrapy.settings import BaseSettings
class Command(ScrapyCommand):
requires_project = False
default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}

View File

@ -13,7 +13,6 @@ from scrapy.utils.url import guess_scheme
class Command(ScrapyCommand):
requires_project = False
default_settings = {
"KEEP_ALIVE": True,

View File

@ -28,7 +28,6 @@ def _make_writable(path):
class Command(ScrapyCommand):
requires_project = False
default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}

View File

@ -4,7 +4,6 @@ from scrapy.utils.versions import scrapy_components_versions
class Command(ScrapyCommand):
default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}
def syntax(self):

View File

@ -69,7 +69,6 @@ def _get_concurrency_delay(concurrency, spider, settings):
class Downloader:
DOWNLOAD_SLOT = "download_slot"
def __init__(self, crawler):

View File

@ -292,7 +292,6 @@ class ScrapyProxyAgent(Agent):
class ScrapyAgent:
_Agent = Agent
_ProxyAgent = ScrapyProxyAgent
_TunnelingAgent = TunnelingAgent

View File

@ -17,7 +17,6 @@ from scrapy.utils.defer import deferred_from_coro, mustbe_deferred
class DownloaderMiddlewareManager(MiddlewareManager):
component_name = "downloader middleware"
@classmethod

View File

@ -40,7 +40,6 @@ def _parse(url):
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = b"\n"
def connectionMade(self):
@ -103,7 +102,6 @@ class ScrapyHTTPPageGetter(HTTPClient):
# Twisted (https://github.com/twisted/twisted/pull/643), we merged its
# non-overridden code into this class.
class ScrapyHTTPClientFactory(ClientFactory):
protocol = ScrapyHTTPPageGetter
waiting = 1

View File

@ -46,7 +46,6 @@ def _isiterable(o) -> bool:
class SpiderMiddlewareManager(MiddlewareManager):
component_name = "spider middleware"
def __init__(self, *middlewares):

View File

@ -30,7 +30,6 @@ class AjaxCrawlMiddleware:
return cls(crawler.settings)
def process_response(self, request, response, spider):
if not isinstance(response, HtmlResponse) or response.status != 200:
return response

View File

@ -27,7 +27,6 @@ HttpCacheMiddlewareTV = TypeVar("HttpCacheMiddlewareTV", bound="HttpCacheMiddlew
class HttpCacheMiddleware:
DOWNLOAD_EXCEPTIONS = (
defer.TimeoutError,
TimeoutError,

View File

@ -53,7 +53,6 @@ class HttpCompressionMiddleware:
request.headers.setdefault("Accept-Encoding", b", ".join(ACCEPTED_ENCODINGS))
def process_response(self, request, response, spider):
if request.method == "HEAD":
return response
if isinstance(response, Response):

View File

@ -26,7 +26,6 @@ def _build_redirect_request(source_request, *, url, **kwargs):
class BaseRedirectMiddleware:
enabled_setting = "REDIRECT_ENABLED"
def __init__(self, settings):
@ -115,7 +114,6 @@ class RedirectMiddleware(BaseRedirectMiddleware):
class MetaRefreshMiddleware(BaseRedirectMiddleware):
enabled_setting = "METAREFRESH_ENABLED"
def __init__(self, settings):

View File

@ -122,7 +122,6 @@ def get_retry_request(
class RetryMiddleware:
# IOError is raised by the HttpCompression middleware when trying to
# decompress an empty response
EXCEPTIONS_TO_RETRY = (

View File

@ -8,7 +8,6 @@ from scrapy.utils.conf import build_component_list
class ExtensionManager(MiddlewareManager):
component_name = "extension"
@classmethod

View File

@ -41,7 +41,6 @@ class DummyPolicy:
class RFC2616Policy:
MAXAGE = 3600 * 24 * 365 # one year
def __init__(self, settings):

View File

@ -15,7 +15,6 @@ from scrapy.utils.deprecate import create_deprecated_class
class JsonRequest(Request):
attributes: Tuple[str, ...] = Request.attributes + ("dumps_kwargs",)
def __init__(self, *args, dumps_kwargs: Optional[dict] = None, **kwargs) -> None:

View File

@ -29,7 +29,6 @@ _NONE = object()
class TextResponse(Response):
_DEFAULT_ENCODING = "ascii"
_cached_decoded_json = _NONE

View File

@ -194,7 +194,6 @@ class LxmlLinkExtractor:
return True
def matches(self, url):
if self.allow_domains and not url_is_from_any_domain(url, self.allow_domains):
return False
if self.deny_domains and url_is_from_any_domain(url, self.deny_domains):

View File

@ -10,7 +10,6 @@ from scrapy.utils.defer import deferred_f_from_coro_f
class ItemPipelineManager(MiddlewareManager):
component_name = "item pipeline"
@classmethod

View File

@ -187,7 +187,6 @@ class S3FilesStore:
class GCSFilesStore:
GCS_PROJECT_ID = None
CACHE_CONTROL = "max-age=172800"
@ -253,7 +252,6 @@ class GCSFilesStore:
class FTPFilesStore:
FTP_USERNAME = None
FTP_PASSWORD = None
USE_ACTIVE_MODE = None

View File

@ -23,7 +23,6 @@ def _DUMMY_CALLBACK(response):
class MediaPipeline:
LOG_FAILED_RESULTS = True
class SpiderInfo:

View File

@ -12,7 +12,6 @@ from scrapy.utils.python import binary_is_text, to_bytes, to_unicode
class ResponseTypes:
CLASSES = {
"text/html": "scrapy.http.HtmlResponse",
"application/atom+xml": "scrapy.http.XmlResponse",

View File

@ -25,7 +25,6 @@ from scrapy.utils.response import open_in_browser
class Shell:
relevant_classes = (Crawler, Spider, Request, Response, Settings)
def __init__(self, crawler, update_vars=None, code=None):

View File

@ -34,7 +34,6 @@ POLICY_SCRAPY_DEFAULT = "scrapy-default"
class ReferrerPolicy:
NOREFERRER_SCHEMES: Tuple[str, ...] = LOCAL_SCHEMES
name: str

View File

@ -60,7 +60,6 @@ class Rule:
class CrawlSpider(Spider):
rules: Sequence[Rule] = ()
def __init__(self, *a, **kw):

View File

@ -89,7 +89,7 @@ class XMLFeedSpider(Spider):
yield node
def _register_namespaces(self, selector):
for (prefix, uri) in self.namespaces:
for prefix, uri in self.namespaces:
selector.register_namespace(prefix, uri)

View File

@ -10,7 +10,6 @@ logger = logging.getLogger(__name__)
class SitemapSpider(Spider):
sitemap_urls = ()
sitemap_rules = [("", "parse")]
sitemap_follow = [""]

View File

@ -6,7 +6,6 @@ from twisted.web.server import Site
class Root(Resource):
isLeaf = True
def getChild(self, name, request):

View File

@ -11,7 +11,6 @@ from collections.abc import Mapping
class CaselessDict(dict):
__slots__ = ()
def __init__(self, seq=None):

View File

@ -54,7 +54,6 @@ def create_deprecated_class(
"""
class DeprecatedClass(new_class.__class__):
deprecated_class = None
warned_on_subclass = False

View File

@ -9,7 +9,6 @@ from scrapy.http import Request, Response
class ScrapyJSONEncoder(json.JSONEncoder):
DATE_FORMAT = "%Y-%m-%d"
TIME_FORMAT = "%H:%M:%S"

View File

@ -5,7 +5,6 @@ from twisted.internet import defer, protocol
class ProcessTest:
command = None
prefix = [sys.executable, "-m", "scrapy.cmdline"]
cwd = os.getcwd() # trial chdirs to temp dir

View File

@ -144,7 +144,6 @@ def strip_url(
origin_only=False,
strip_fragment=True,
):
"""Strip URL string from some of its components:
- ``strip_credentials`` removes "user:password@"

View File

@ -95,7 +95,6 @@ class BrokenDownloadResource(resource.Resource):
class LeafResource(resource.Resource):
isLeaf = True
def deferRequest(self, request, delay, f, *a, **kw):

View File

@ -2,7 +2,7 @@
attrs
pyftpdlib
pytest
pytest-cov==3.0.0
pytest-cov==4.0.0
pytest-xdist
sybil >= 1.3.0 # https://github.com/cjw296/sybil/issues/20#issuecomment-605433422
testfixtures

View File

@ -25,7 +25,6 @@ class MockServerSpider(Spider):
class MetaSpider(MockServerSpider):
name = "meta"
def __init__(self, *args, **kwargs):
@ -37,7 +36,6 @@ class MetaSpider(MockServerSpider):
class FollowAllSpider(MetaSpider):
name = "follow"
link_extractor = LinkExtractor()
@ -59,7 +57,6 @@ class FollowAllSpider(MetaSpider):
class DelaySpider(MetaSpider):
name = "delay"
def __init__(self, n=1, b=0, *args, **kwargs):
@ -81,7 +78,6 @@ class DelaySpider(MetaSpider):
class SimpleSpider(MetaSpider):
name = "simple"
def __init__(self, url="http://localhost:8998", *args, **kwargs):
@ -93,7 +89,6 @@ class SimpleSpider(MetaSpider):
class AsyncDefSpider(SimpleSpider):
name = "asyncdef"
async def parse(self, response):
@ -102,7 +97,6 @@ class AsyncDefSpider(SimpleSpider):
class AsyncDefAsyncioSpider(SimpleSpider):
name = "asyncdef_asyncio"
async def parse(self, response):
@ -112,7 +106,6 @@ class AsyncDefAsyncioSpider(SimpleSpider):
class AsyncDefAsyncioReturnSpider(SimpleSpider):
name = "asyncdef_asyncio_return"
async def parse(self, response):
@ -123,7 +116,6 @@ class AsyncDefAsyncioReturnSpider(SimpleSpider):
class AsyncDefAsyncioReturnSingleElementSpider(SimpleSpider):
name = "asyncdef_asyncio_return_single_element"
async def parse(self, response):
@ -134,7 +126,6 @@ class AsyncDefAsyncioReturnSingleElementSpider(SimpleSpider):
class AsyncDefAsyncioReqsReturnSpider(SimpleSpider):
name = "asyncdef_asyncio_reqs_return"
async def parse(self, response):
@ -191,7 +182,6 @@ class AsyncDefDeferredMaybeWrappedSpider(SimpleSpider):
class AsyncDefAsyncioGenSpider(SimpleSpider):
name = "asyncdef_asyncio_gen"
async def parse(self, response):
@ -201,7 +191,6 @@ class AsyncDefAsyncioGenSpider(SimpleSpider):
class AsyncDefAsyncioGenLoopSpider(SimpleSpider):
name = "asyncdef_asyncio_gen_loop"
async def parse(self, response):
@ -212,7 +201,6 @@ class AsyncDefAsyncioGenLoopSpider(SimpleSpider):
class AsyncDefAsyncioGenComplexSpider(SimpleSpider):
name = "asyncdef_asyncio_gen_complex"
initial_reqs = 4
following_reqs = 3
@ -246,7 +234,6 @@ class AsyncDefAsyncioGenComplexSpider(SimpleSpider):
class ItemSpider(FollowAllSpider):
name = "item"
def parse(self, response):
@ -261,7 +248,6 @@ class DefaultError(Exception):
class ErrorSpider(FollowAllSpider):
name = "error"
exception_cls = DefaultError
@ -275,7 +261,6 @@ class ErrorSpider(FollowAllSpider):
class BrokenStartRequestsSpider(FollowAllSpider):
fail_before_yield = False
fail_yielding = False
@ -305,7 +290,6 @@ class BrokenStartRequestsSpider(FollowAllSpider):
class SingleRequestSpider(MetaSpider):
seed = None
callback_func = None
errback_func = None
@ -451,7 +435,6 @@ class CrawlSpiderWithProcessRequestCallbackKeywordArguments(CrawlSpiderWithParse
class BytesReceivedCallbackSpider(MetaSpider):
full_response_length = 2**18
@classmethod

View File

@ -2,7 +2,6 @@ from tests.test_commands import CommandTest
class CheckCommandTest(CommandTest):
command = "check"
def setUp(self):

View File

@ -6,7 +6,6 @@ from scrapy.utils.testsite import SiteTest
class FetchTest(ProcessTest, SiteTest, unittest.TestCase):
command = "fetch"
@defer.inlineCallbacks

View File

@ -9,7 +9,6 @@ from tests import NON_EXISTING_RESOLVABLE, tests_datadir
class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
command = "shell"
@defer.inlineCallbacks

View File

@ -8,7 +8,6 @@ from scrapy.utils.testproc import ProcessTest
class VersionTest(ProcessTest, unittest.TestCase):
command = "version"
@defer.inlineCallbacks

View File

@ -223,7 +223,6 @@ def get_permissions_dict(
class StartprojectTemplatesTest(ProjectTest):
maxDiff = None
def setUp(self):
@ -604,7 +603,6 @@ class MiscCommandsTest(CommandTest):
class RunSpiderCommandTest(CommandTest):
spider_filename = "myspider.py"
debug_log_spider = """
@ -873,7 +871,6 @@ class MySpider(scrapy.Spider):
@skipIf(platform.system() != "Windows", "Windows required for .pyw files")
class WindowsRunSpiderCommandTest(RunSpiderCommandTest):
spider_filename = "myspider.pyw"
def setUp(self):

View File

@ -25,7 +25,6 @@ from tests.test_downloader_handlers import (
@skipIf(not H2_ENABLED, "HTTP/2 support in Twisted is not enabled")
class Https2TestCase(Https11TestCase):
scheme = "https"
HTTP2_DATALOSS_SKIP_REASON = "Content-Length mismatch raises InvalidBodyLengthError"

View File

@ -16,7 +16,6 @@ from scrapy.utils.test import get_crawler, get_from_asyncio_queue
class ManagerTestCase(TestCase):
settings_dict = None
def setUp(self):

View File

@ -17,7 +17,6 @@ def _test_data(formats):
class DecompressionMiddlewareTest(TestCase):
test_formats = ["tar", "xml.bz2", "xml.gz", "zip"]
uncompressed_body, test_responses = _test_data(test_formats)

View File

@ -14,7 +14,6 @@ from scrapy.utils.test import get_crawler
class _BaseTest(unittest.TestCase):
storage_class = "scrapy.extensions.httpcache.DbmCacheStorage"
policy_class = "scrapy.extensions.httpcache.RFC2616Policy"
@ -146,12 +145,10 @@ class DefaultStorageTest(_BaseTest):
class DbmStorageTest(DefaultStorageTest):
storage_class = "scrapy.extensions.httpcache.DbmCacheStorage"
class DbmStorageWithCustomDbmModuleTest(DbmStorageTest):
dbm_module = "tests.mocks.dummydbm"
def _get_settings(self, **new_settings):
@ -165,7 +162,6 @@ class DbmStorageWithCustomDbmModuleTest(DbmStorageTest):
class FilesystemStorageTest(DefaultStorageTest):
storage_class = "scrapy.extensions.httpcache.FilesystemCacheStorage"
@ -176,7 +172,6 @@ class FilesystemStorageGzipTest(FilesystemStorageTest):
class DummyPolicyTest(_BaseTest):
policy_class = "scrapy.extensions.httpcache.DummyPolicy"
def test_middleware(self):
@ -270,7 +265,6 @@ class DummyPolicyTest(_BaseTest):
class RFC2616PolicyTest(DefaultStorageTest):
policy_class = "scrapy.extensions.httpcache.RFC2616Policy"
def _process_requestresponse(self, mw, request, response):

View File

@ -13,7 +13,6 @@ spider = Spider("foo")
class TestHttpProxyMiddleware(TestCase):
failureException = AssertionError
def setUp(self):

View File

@ -127,7 +127,6 @@ class RetryTest(unittest.TestCase):
class MaxRetryTimesTest(unittest.TestCase):
invalid_url = "http://www.scrapytest.org/invalid_url"
def get_spider_and_middleware(self, settings=None):

View File

@ -243,7 +243,6 @@ class CrawlerRun:
class EngineTest(unittest.TestCase):
@defer.inlineCallbacks
def test_crawler(self):
for spider in (
TestSpider,
DictItemsSpider,

View File

@ -55,7 +55,6 @@ class CustomFieldDataclass:
class BaseItemExporterTest(unittest.TestCase):
item_class = TestItem
custom_field_item_class = CustomFieldItem
@ -513,13 +512,11 @@ class XmlItemExporterTest(BaseItemExporterTest):
class XmlItemExporterDataclassTest(XmlItemExporterTest):
item_class = TestDataClass
custom_field_item_class = CustomFieldDataclass
class JsonLinesItemExporterTest(BaseItemExporterTest):
_expected_nested = {
"name": "Jesus",
"age": {"name": "Maria", "age": {"name": "Joseph", "age": "22"}},
@ -559,13 +556,11 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
class JsonLinesItemExporterDataclassTest(JsonLinesItemExporterTest):
item_class = TestDataClass
custom_field_item_class = CustomFieldDataclass
class JsonItemExporterTest(JsonLinesItemExporterTest):
_expected_nested = [JsonLinesItemExporterTest._expected_nested]
def _get_exporter(self, **kwargs):
@ -627,13 +622,11 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
class JsonItemExporterDataclassTest(JsonItemExporterTest):
item_class = TestDataClass
custom_field_item_class = CustomFieldDataclass
class CustomExporterItemTest(unittest.TestCase):
item_class = TestItem
def setUp(self):
@ -664,7 +657,6 @@ class CustomExporterItemTest(unittest.TestCase):
class CustomExporterDataclassTest(CustomExporterItemTest):
item_class = TestDataClass

View File

@ -1068,7 +1068,6 @@ class FeedExportTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_export_multiple_item_classes(self):
items = [
self.MyItem({"foo": "bar1", "egg": "spam1"}),
self.MyItem2({"hello": "world2", "foo": "bar2"}),
@ -1711,7 +1710,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_gzip_plugin(self):
filename = self._named_tempfile("gzip_file")
settings = {
@ -1731,7 +1729,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_gzip_plugin_compresslevel(self):
filename_to_compressed = {
self._named_tempfile("compresslevel_0"): self.get_gzip_compressed(
self.expected, compresslevel=0
@ -1839,7 +1836,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_lzma_plugin(self):
filename = self._named_tempfile("lzma_file")
settings = {
@ -1859,7 +1855,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_lzma_plugin_format(self):
filename_to_compressed = {
self._named_tempfile("format_FORMAT_XZ"): lzma.compress(
self.expected, format=lzma.FORMAT_XZ
@ -1893,7 +1888,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_lzma_plugin_check(self):
filename_to_compressed = {
self._named_tempfile("check_CHECK_NONE"): lzma.compress(
self.expected, check=lzma.CHECK_NONE
@ -1927,7 +1921,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_lzma_plugin_preset(self):
filename_to_compressed = {
self._named_tempfile("preset_PRESET_0"): lzma.compress(
self.expected, preset=0
@ -1986,7 +1979,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_bz2_plugin(self):
filename = self._named_tempfile("bz2_file")
settings = {
@ -2006,7 +1998,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_bz2_plugin_compresslevel(self):
filename_to_compressed = {
self._named_tempfile("compresslevel_1"): bz2.compress(
self.expected, compresslevel=1
@ -2056,7 +2047,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_custom_plugin_with_parameter(self):
expected = b"foo\r\n\nbar\r\n\n"
filename = self._named_tempfile("newline")
@ -2075,7 +2065,6 @@ class FeedPostProcessedExportsTest(FeedExportTestBase):
@defer.inlineCallbacks
def test_custom_plugin_with_compression(self):
expected = b"foo\r\n\nbar\r\n\n"
filename_to_decompressor = {
@ -2555,7 +2544,6 @@ class BatchDeliveriesTest(FeedExportTestBase):
]
class CustomS3FeedStorage(S3FeedStorage):
stubs = []
def open(self, *args, **kwargs):
@ -2828,7 +2816,6 @@ class FTPFeedStoragePreFeedOptionsTest(unittest.TestCase):
class URIParamsTest:
spider_name = "uri_params_spider"
deprecated_options = False

View File

@ -19,7 +19,6 @@ from scrapy.utils.python import to_bytes, to_unicode
class RequestTest(unittest.TestCase):
request_class = Request
default_method = "GET"
default_headers = {}
@ -424,7 +423,6 @@ class RequestTest(unittest.TestCase):
class FormRequestTest(RequestTest):
request_class = FormRequest
def assertQueryEqual(self, first, second, msg=None):
@ -1447,7 +1445,6 @@ def _qs(req, encoding="utf-8", to_unicode=False):
class XmlRpcRequestTest(RequestTest):
request_class = XmlRpcRequest
default_method = "POST"
default_headers = {b"Content-Type": [b"text/xml"]}

View File

@ -23,7 +23,6 @@ from tests import get_testdata
class BaseResponseTest(unittest.TestCase):
response_class = Response
def test_init(self):
@ -349,7 +348,6 @@ class BaseResponseTest(unittest.TestCase):
class TextResponseTest(BaseResponseTest):
response_class = TextResponse
def test_replace(self):
@ -835,11 +833,9 @@ class TextResponseTest(BaseResponseTest):
class HtmlResponseTest(TextResponseTest):
response_class = HtmlResponse
def test_html_encoding(self):
body = b"""<html><head><title>Some page</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head><body>Price: \xa3100</body></html>'
@ -878,7 +874,6 @@ class HtmlResponseTest(TextResponseTest):
class XmlResponseTest(TextResponseTest):
response_class = XmlResponse
def test_xml_encoding(self):

View File

@ -87,7 +87,6 @@ class BasicItemLoaderTest(unittest.TestCase):
class InitializationTestMixin:
item_class = None
def test_keep_single_value(self):

View File

@ -469,7 +469,6 @@ class BasicItemLoaderTest(unittest.TestCase):
class InitializationFromDictTest(unittest.TestCase):
item_class = dict
def test_keep_single_value(self):

View File

@ -16,7 +16,6 @@ from tests.spiders import ItemSpider
class CustomItem(Item):
name = Field()
def __str__(self):

View File

@ -116,7 +116,6 @@ class FileDownloadCrawlTestCase(TestCase):
self.assertTrue((self.tmpmediastore / i["path"]).exists())
def _assert_files_download_failure(self, crawler, items, code, logs):
# check that the item does NOT have the "images/files" field populated
self.assertEqual(len(items), 1)
self.assertIn(self.media_key, items[0])
@ -205,7 +204,6 @@ else:
class ImageDownloadCrawlTestCase(FileDownloadCrawlTestCase):
skip = skip_pillow
pipeline_class = "scrapy.pipelines.images.ImagesPipeline"

View File

@ -33,7 +33,6 @@ else:
class ImagesPipelineTestCase(unittest.TestCase):
skip = skip_pillow
def setUp(self):
@ -325,7 +324,6 @@ class DeprecatedImagesPipeline(ImagesPipeline):
class ImagesPipelineTestCaseFieldsMixin:
skip = skip_pillow
def test_item_fields_default(self):
@ -420,7 +418,6 @@ class ImagesPipelineTestCaseFieldsAttrsItem(
class ImagesPipelineTestCaseCustomSettings(unittest.TestCase):
skip = skip_pillow
img_cls_attribute_names = [

View File

@ -37,7 +37,6 @@ def _mocked_download_func(request, info):
class BaseMediaPipelineTestCase(unittest.TestCase):
pipeline_class = MediaPipeline
settings = None
@ -213,7 +212,6 @@ class MockedMediaPipeline(MediaPipeline):
class MediaPipelineTestCase(BaseMediaPipelineTestCase):
pipeline_class = MockedMediaPipeline
def _callback(self, result):

View File

@ -154,7 +154,6 @@ class KeywordArgumentsSpider(MockServerSpider):
class CallbackKeywordArgumentsTestCase(TestCase):
maxDiff = None
def setUp(self):

View File

@ -43,7 +43,6 @@ class MockDownloader:
class MockCrawler(Crawler):
def __init__(self, priority_queue_cls, jobdir):
settings = dict(
SCHEDULER_DEBUG=False,
SCHEDULER_DISK_QUEUE="scrapy.squeues.PickleLifoDiskQueue",
@ -325,7 +324,6 @@ class TestIntegrationWithDownloaderAwareInMemory(TestCase):
@defer.inlineCallbacks
def test_integration_downloader_aware_priority_queue(self):
with MockServer() as mockserver:
url = mockserver.url("/status?n=200", is_secure=False)
start_urls = [url] * 6
yield self.crawler.crawl(start_urls)

View File

@ -93,7 +93,6 @@ class BaseSettingsTest(unittest.TestCase):
with mock.patch.object(attr, "__setattr__") as mock_setattr, mock.patch.object(
attr, "set"
) as mock_set:
self.settings.attributes = {"TEST_OPTION": attr}
for priority in (0, 10, 20):

View File

@ -26,7 +26,6 @@ from tests import get_testdata
class SpiderTest(unittest.TestCase):
spider_class = Spider
def setUp(self):
@ -115,12 +114,10 @@ class SpiderTest(unittest.TestCase):
class InitSpiderTest(SpiderTest):
spider_class = InitSpider
class XMLFeedSpiderTest(SpiderTest):
spider_class = XMLFeedSpider
def test_register_namespace(self):
@ -174,7 +171,6 @@ class XMLFeedSpiderTest(SpiderTest):
class CSVFeedSpiderTest(SpiderTest):
spider_class = CSVFeedSpider
def test_parse_rows(self):
@ -196,7 +192,6 @@ class CSVFeedSpiderTest(SpiderTest):
class CrawlSpiderTest(SpiderTest):
test_body = b"""<html><head><title>Page title<title>
<body>
<p><a href="item/12.html">Item 12</a></p>
@ -210,7 +205,6 @@ class CrawlSpiderTest(SpiderTest):
spider_class = CrawlSpider
def test_rule_without_link_extractor(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -234,7 +228,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_links(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -261,7 +254,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_links_filter(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -290,7 +282,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_links_generator(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -318,7 +309,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_request(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -347,7 +337,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_request_with_response(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -383,7 +372,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_request_instance_method(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -410,7 +398,6 @@ class CrawlSpiderTest(SpiderTest):
)
def test_process_request_instance_method_with_response(self):
response = HtmlResponse(
"http://example.org/somepage/index.html", body=self.test_body
)
@ -467,7 +454,6 @@ class CrawlSpiderTest(SpiderTest):
class SitemapSpiderTest(SpiderTest):
spider_class = SitemapSpider
BODY = b"SITEMAP"
@ -689,7 +675,6 @@ class DeprecationTest(unittest.TestCase):
class NoParseMethodSpiderTest(unittest.TestCase):
spider_class = Spider
def test_undefined_parse_method(self):

View File

@ -114,13 +114,11 @@ class SpiderLoaderTest(unittest.TestCase):
self.assertEqual(crawler.spidercls.name, "spider1")
def test_bad_spider_modules_exception(self):
module = "tests.test_spiderloader.test_spiders.doesnotexist"
settings = Settings({"SPIDER_MODULES": [module]})
self.assertRaises(ImportError, SpiderLoader.from_settings, settings)
def test_bad_spider_modules_warning(self):
with warnings.catch_warnings(record=True) as w:
module = "tests.test_spiderloader.test_spiders.doesnotexist"
settings = Settings(

View File

@ -31,7 +31,6 @@ from scrapy.spiders import Spider
class TestRefererMiddleware(TestCase):
req_meta = {}
resp_headers = {}
settings = {}
@ -51,7 +50,6 @@ class TestRefererMiddleware(TestCase):
return Response(origin, headers=self.resp_headers)
def test(self):
for origin, target, referrer in self.scenarii:
response = self.get_response(origin)
request = self.get_request(target)
@ -770,7 +768,6 @@ class TestRequestMetaPrecedence003(MixinUnsafeUrl, TestRefererMiddleware):
class TestRequestMetaSettingFallback(TestCase):
params = [
(
# When an unknown policy is referenced in Request.meta
@ -824,7 +821,6 @@ class TestRequestMetaSettingFallback(TestCase):
]
def test(self):
origin = "http://www.scrapy.org"
target = "http://www.example.com"
@ -923,7 +919,6 @@ class TestPolicyHeaderPrecedence004(
class TestReferrerOnRedirect(TestRefererMiddleware):
settings = {"REFERRER_POLICY": "scrapy.spidermiddlewares.referer.UnsafeUrlPolicy"}
scenarii = [
(
@ -966,7 +961,6 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
self.redirectmw = RedirectMiddleware(settings)
def test(self):
for (
parent,
target,

View File

@ -73,7 +73,6 @@ class ChunkSize4MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
class PickleFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
chunksize = 100000
def queue(self):

View File

@ -20,7 +20,6 @@ except ImportError:
class UtilsConsoleTestCase(unittest.TestCase):
def test_get_shell_embed_func(self):
shell = get_shell_embed_func(["invalid"])
self.assertEqual(shell, None)
@ -30,14 +29,12 @@ class UtilsConsoleTestCase(unittest.TestCase):
@unittest.skipIf(not bpy, "bpython not available in testenv")
def test_get_shell_embed_func2(self):
shell = get_shell_embed_func(["bpython"])
self.assertTrue(callable(shell))
self.assertEqual(shell.__name__, "_embed_bpython_shell")
@unittest.skipIf(not ipy, "IPython not available in testenv")
def test_get_shell_embed_func3(self):
# default shell should be 'ipython'
shell = get_shell_embed_func()
self.assertEqual(shell.__name__, "_embed_ipython_shell")

View File

@ -7,7 +7,6 @@ from tests import get_testdata
class XmliterTestCase(unittest.TestCase):
xmliter = staticmethod(xmliter)
def test_xmliter(self):

View File

@ -462,7 +462,6 @@ class BackwardCompatibilityTestCase(unittest.TestCase):
warning to be logged."""
class RequestFingerprinter:
cache = WeakKeyDictionary()
def fingerprint(self, request):
@ -641,7 +640,6 @@ class CustomRequestFingerprinterTestCase(unittest.TestCase):
def test_from_crawler_and_settings(self):
class RequestFingerprinter:
# This method is ignored due to the presence of from_crawler
@classmethod
def from_settings(cls, settings):

View File

@ -16,7 +16,6 @@ class UtilsRenderTemplateFileTestCase(unittest.TestCase):
rmtree(self.tmp_path)
def test_simple_render(self):
context = dict(project_name="proj", name="spi", classname="TheSpider")
template = "from ${project_name}.spiders.${name} import ${classname}"
rendered = "from proj.spiders.spi import TheSpider"

36
tox.ini
View File

@ -4,7 +4,7 @@
# and then run "tox" from this directory.
[tox]
envlist = security,flake8,black,typing,py
envlist = pre-commit,typing,py
minversion = 1.7.0
[testenv]
@ -37,44 +37,34 @@ install_command =
[testenv:typing]
basepython = python3
deps =
lxml-stubs==0.2.0
lxml-stubs==0.4.0
mypy==0.991
types-attrs==19.1.0
types-pyOpenSSL==21.0.0
types-setuptools==57.0.0
types-pyOpenSSL==23.0.0.2
types-setuptools==65.7.0.3
commands =
mypy --show-error-codes {posargs: scrapy tests}
[testenv:security]
[testenv:pre-commit]
basepython = python3
deps =
bandit==1.7.4
pre-commit
commands =
bandit -r -c .bandit.yml {posargs:scrapy}
[testenv:flake8]
basepython = python3
deps =
{[testenv]deps}
# Twisted[http2] is required to import some files
Twisted[http2]>=17.9.0
flake8==6.0.0
commands =
flake8 {posargs:docs scrapy tests}
pre-commit run {posargs:--all-files}
[testenv:pylint]
basepython = python3
deps =
{[testenv:extra-deps]deps}
pylint==2.15.6
pylint==2.16.0
commands =
pylint conftest.py docs extras scrapy setup.py tests
[testenv:twinecheck]
basepython = python3
deps =
twine==4.0.1
build==0.9.0
twine==4.0.2
build==0.10.0
commands =
python -m build --sdist
twine check dist/*
@ -195,9 +185,3 @@ deps = {[docs]deps}
setenv = {[docs]setenv}
commands =
sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck
[testenv:black]
deps =
black==22.12.0
commands =
black {posargs:--check .}