From 3b6f7ac9f2f5b48b9f2f3ce106d1205599d2164f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 22 Oct 2019 19:43:02 +0200
Subject: [PATCH 001/181] Use pylint

---
 .travis.yml           |  2 +
 docs/utils/linkfix.py | 85 ++++++++++++++++++++++-------------------
 pylintrc              | 88 +++++++++++++++++++++++++++++++++++++++++++
 tox.ini               | 14 +++++++
 4 files changed, 150 insertions(+), 39 deletions(-)
 create mode 100644 pylintrc

diff --git a/.travis.yml b/.travis.yml
index 0190a7f4d..28a19f4f0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,8 @@ branches:
     - /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
 matrix:
   include:
+    - env: TOXENV=pylint
+      python: 3.7
     - env: TOXENV=py27
       python: 2.7
     - env: TOXENV=py27-pinned
diff --git a/docs/utils/linkfix.py b/docs/utils/linkfix.py
index 6290adbe2..9acfc3b23 100755
--- a/docs/utils/linkfix.py
+++ b/docs/utils/linkfix.py
@@ -14,50 +14,57 @@ Author: dufferzafar
 
 import re
 
-# Used for remembering the file (and its contents)
-# so we don't have to open the same file again.
-_filename = None
-_contents = None
 
-# A regex that matches standard linkcheck output lines
-line_re = re.compile(u'(.*)\:\d+\:\s\[(.*)\]\s(?:(.*)\sto\s(.*)|(.*))')
+def main():
 
-# Read lines from the linkcheck output file
-try:
-    with open("build/linkcheck/output.txt") as out:
-        output_lines = out.readlines()
-except IOError:
-    print("linkcheck output not found; please run linkcheck first.")
-    exit(1)
+    # Used for remembering the file (and its contents)
+    # so we don't have to open the same file again.
+    _filename = None
+    _contents = None
 
-# For every line, fix the respective file
-for line in output_lines:
-    match = re.match(line_re, line)
+    # A regex that matches standard linkcheck output lines
+    line_re = re.compile(u'(.*)\:\d+\:\s\[(.*)\]\s(?:(.*)\sto\s(.*)|(.*))')
 
-    if match:
-        newfilename = match.group(1)
-        errortype = match.group(2)
+    # Read lines from the linkcheck output file
+    try:
+        with open("build/linkcheck/output.txt") as out:
+            output_lines = out.readlines()
+    except IOError:
+        print("linkcheck output not found; please run linkcheck first.")
+        exit(1)
 
-        # Broken links can't be fixed and
-        # I am not sure what do with the local ones.
-        if errortype.lower() in ["broken", "local"]:
-            print("Not Fixed: " + line)
+    # For every line, fix the respective file
+    for line in output_lines:
+        match = re.match(line_re, line)
+
+        if match:
+            newfilename = match.group(1)
+            errortype = match.group(2)
+
+            # Broken links can't be fixed and
+            # I am not sure what do with the local ones.
+            if errortype.lower() in ["broken", "local"]:
+                print("Not Fixed: " + line)
+            else:
+                # If this is a new file
+                if newfilename != _filename:
+
+                    # Update the previous file
+                    if _filename:
+                        with open(_filename, "w") as _file:
+                            _file.write(_contents)
+
+                    _filename = newfilename
+
+                    # Read the new file to memory
+                    with open(_filename) as _file:
+                        _contents = _file.read()
+
+                _contents = _contents.replace(match.group(3), match.group(4))
         else:
-            # If this is a new file
-            if newfilename != _filename:
+            # We don't understand what the current line means!
+            print("Not Understood: " + line)
 
-                # Update the previous file
-                if _filename:
-                    with open(_filename, "w") as _file:
-                        _file.write(_contents)
 
-                _filename = newfilename
-
-                # Read the new file to memory
-                with open(_filename) as _file:
-                    _contents = _file.read()
-
-            _contents = _contents.replace(match.group(3), match.group(4))
-    else:
-        # We don't understand what the current line means!
-        print("Not Understood: " + line)
+if __name__ == '__main__':
+    main()
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 000000000..b83bc9f82
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,88 @@
+[MASTER]
+persistent=no
+jobs=1  # >1 hides results
+
+[MESSAGES CONTROL]
+disable=abstract-method,
+        anomalous-backslash-in-string,
+        arguments-differ,
+        attribute-defined-outside-init,
+        bad-classmethod-argument,
+        bad-continuation,
+        bad-indentation,
+        bad-mcs-classmethod-argument,
+        bad-whitespace,
+        broad-except,
+        c-extension-no-member,
+        catching-non-exception,
+        cell-var-from-loop,
+        comparison-with-callable,
+        consider-using-in,
+        cyclic-import,
+        dangerous-default-value,
+        deprecated-method,
+        deprecated-module,
+        duplicate-code,  # https://github.com/PyCQA/pylint/issues/214
+        eval-used,
+        expression-not-assigned,
+        fixme,
+        function-redefined,
+        global-statement,
+        import-error,
+        import-outside-toplevel,
+        inconsistent-return-statements,
+        inherit-non-class,
+        invalid-name,
+        keyword-arg-before-vararg,
+        line-too-long,
+        logging-format-interpolation,
+        logging-not-lazy,
+        lost-exception,
+        method-hidden,
+        missing-docstring,
+        missing-final-newline,
+        multiple-imports,
+        multiple-statements,
+        no-else-continue,
+        no-else-raise,
+        no-else-return,
+        no-init,
+        no-member,
+        no-method-argument,
+        no-name-in-module,
+        no-self-argument,
+        no-self-use,
+        pointless-string-statement,
+        protected-access,
+        redefined-argument-from-local,
+        redefined-builtin,
+        redefined-outer-name,
+        reimported,
+        signature-differs,
+        super-init-not-called,
+        superfluous-parens,
+        too-few-public-methods,
+        too-many-ancestors,
+        too-many-arguments,
+        too-many-branches,
+        too-many-function-args,
+        too-many-instance-attributes,
+        too-many-locals,
+        too-many-return-statements,
+        trailing-newlines,
+        trailing-whitespace,
+        unexpected-special-method-signature,
+        ungrouped-imports,
+        unidiomatic-typecheck,
+        unnecessary-comprehension,
+        unnecessary-pass,
+        unsubscriptable-object,
+        unused-argument,
+        unused-import,
+        unused-variable,
+        unused-wildcard-import,
+        used-before-assignment,
+        useless-object-inheritance,  # Required for Python 2 support
+        wildcard-import,
+        wrong-import-order,
+        wrong-import-position
diff --git a/tox.ini b/tox.ini
index ffe7360d3..e7d366fe9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -98,6 +98,20 @@ deps = {[testenv:py35]deps}
 commands =
     py.test {posargs:scrapy tests}
 
+[testenv:pylint]
+basepython = python3.7
+deps =
+    {[testenv:py35]deps}
+    # Optional dependencies
+    boto
+    reppy
+    robotexclusionrulesparser
+    # Test dependencies
+    pylint
+
+commands =
+    pylint scrapy
+
 [docs]
 changedir = docs
 deps =

From 02577f55a0586bc3e6c13a4a3ea572c7eefc82b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Thu, 24 Oct 2019 13:25:11 +0200
Subject: [PATCH 002/181] Have PyLint cover all Python files in the repository

---
 pylintrc | 19 +++++++++++++++++++
 tox.ini  |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/pylintrc b/pylintrc
index b83bc9f82..ca3ea1c57 100644
--- a/pylintrc
+++ b/pylintrc
@@ -11,13 +11,18 @@ disable=abstract-method,
         bad-continuation,
         bad-indentation,
         bad-mcs-classmethod-argument,
+        bad-super-call,
         bad-whitespace,
+        blacklisted-name,
         broad-except,
         c-extension-no-member,
         catching-non-exception,
         cell-var-from-loop,
         comparison-with-callable,
+        consider-iterating-dictionary,
         consider-using-in,
+        consider-using-set-comprehension,
+        consider-using-sys-exit,
         cyclic-import,
         dangerous-default-value,
         deprecated-method,
@@ -30,6 +35,7 @@ disable=abstract-method,
         global-statement,
         import-error,
         import-outside-toplevel,
+        import-self,
         inconsistent-return-statements,
         inherit-non-class,
         invalid-name,
@@ -39,6 +45,7 @@ disable=abstract-method,
         logging-not-lazy,
         lost-exception,
         method-hidden,
+        misplaced-comparison-constant,
         missing-docstring,
         missing-final-newline,
         multiple-imports,
@@ -52,6 +59,9 @@ disable=abstract-method,
         no-name-in-module,
         no-self-argument,
         no-self-use,
+        no-value-for-parameter,
+        not-callable,
+        pointless-statement,
         pointless-string-statement,
         protected-access,
         redefined-argument-from-local,
@@ -59,6 +69,7 @@ disable=abstract-method,
         redefined-outer-name,
         reimported,
         signature-differs,
+        singleton-comparison,
         super-init-not-called,
         superfluous-parens,
         too-few-public-methods,
@@ -67,15 +78,21 @@ disable=abstract-method,
         too-many-branches,
         too-many-function-args,
         too-many-instance-attributes,
+        too-many-lines,
         too-many-locals,
+        too-many-public-methods,
         too-many-return-statements,
         trailing-newlines,
         trailing-whitespace,
+        unbalanced-tuple-unpacking,
+        undefined-variable,
         unexpected-special-method-signature,
         ungrouped-imports,
         unidiomatic-typecheck,
         unnecessary-comprehension,
+        unnecessary-lambda,
         unnecessary-pass,
+        unreachable,
         unsubscriptable-object,
         unused-argument,
         unused-import,
@@ -83,6 +100,8 @@ disable=abstract-method,
         unused-wildcard-import,
         used-before-assignment,
         useless-object-inheritance,  # Required for Python 2 support
+        useless-return,
+        useless-super-delegation,
         wildcard-import,
         wrong-import-order,
         wrong-import-position
diff --git a/tox.ini b/tox.ini
index e7d366fe9..428571ef2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -110,7 +110,7 @@ deps =
     pylint
 
 commands =
-    pylint scrapy
+    pylint conftest.py docs extras scrapy setup.py tests
 
 [docs]
 changedir = docs

From c7f9b955bdf2405fce58907b0395abce2400a66d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Thu, 19 Dec 2019 12:44:52 +0100
Subject: [PATCH 003/181] Pylint: ignore not-an-iterable

---
 pylintrc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pylintrc b/pylintrc
index ca3ea1c57..c52a4c2d0 100644
--- a/pylintrc
+++ b/pylintrc
@@ -60,6 +60,7 @@ disable=abstract-method,
         no-self-argument,
         no-self-use,
         no-value-for-parameter,
+        not-an-iterable,
         not-callable,
         pointless-statement,
         pointless-string-statement,

From 80925ab845b7f55be97d9bb91015ceee90efc333 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 5 Aug 2019 11:39:07 -0300
Subject: [PATCH 004/181] Get server IP address for HTTP/1.1 responses

---
 docs/topics/request-response.rst          | 12 +++++++++-
 scrapy/core/downloader/__init__.py        |  2 +-
 scrapy/core/downloader/handlers/http11.py | 18 ++++++++++-----
 scrapy/http/response/__init__.py          |  5 +++--
 tests/test_crawl.py                       | 27 +++++++++++++++++++++++
 5 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 8997a7f19..a4cc1a7d7 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -34,7 +34,7 @@ Request objects
     :type url: string
 
     :param callback: the function that will be called with the response of this
-       request (once its downloaded) as its first parameter. For more information
+       request (once it's downloaded) as its first parameter. For more information
        see :ref:`topics-request-response-ref-request-callback-arguments` below.
        If a Request doesn't specify a callback, the spider's
        :meth:`~scrapy.spiders.Spider.parse` method will be used.
@@ -611,6 +611,12 @@ Response objects
         This represents the :class:`Request` that generated this response.
     :type request: :class:`Request` object
 
+    :param ip_address: The IP address of the server from which the Response originated.
+    :type ip_address: :class:`ipaddress.IPv4Address` object
+
+    .. FIXME: Add ipaddress.IPv6Address once it's supported
+
+
     .. attribute:: Response.url
 
         A string containing the URL of the response.
@@ -679,6 +685,10 @@ Response objects
         they're shown on the string representation of the Response (`__str__`
         method) which is used by the engine for logging.
 
+    .. attribute:: Response.ip_address
+
+        The IP address of the server from which the Response originated.
+
     .. method:: Response.copy()
 
        Returns a new Response which is a copy of this Response.
diff --git a/scrapy/core/downloader/__init__.py b/scrapy/core/downloader/__init__.py
index 157dc3418..11c9dd908 100644
--- a/scrapy/core/downloader/__init__.py
+++ b/scrapy/core/downloader/__init__.py
@@ -172,7 +172,7 @@ class Downloader(object):
             return response
         dfd.addCallback(_downloaded)
 
-        # 3. After response arrives,  remove the request from transferring
+        # 3. After response arrives, remove the request from transferring
         # state to free up the transferring slot so it can be used by the
         # following requests (perhaps those which came from the downloader
         # middleware itself)
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 5a5f6cf0a..b690f439f 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -4,6 +4,7 @@ import logging
 import re
 import warnings
 from io import BytesIO
+from ipaddress import ip_address
 from time import time
 from urllib.parse import urldefrag
 
@@ -382,7 +383,7 @@ class ScrapyAgent(object):
     def _cb_bodyready(self, txresponse, request):
         # deliverBody hangs for responses without body
         if txresponse.length == 0:
-            return txresponse, b'', None
+            return txresponse, b'', None, None
 
         maxsize = request.meta.get('download_maxsize', self._maxsize)
         warnsize = request.meta.get('download_warnsize', self._warnsize)
@@ -418,11 +419,11 @@ class ScrapyAgent(object):
         return d
 
     def _cb_bodydone(self, result, request, url):
-        txresponse, body, flags = result
+        txresponse, body, flags, ip_address = result
         status = int(txresponse.code)
         headers = Headers(txresponse.headers.getAllRawHeaders())
         respcls = responsetypes.from_args(headers=headers, url=url, body=body)
-        return respcls(url=url, status=status, headers=headers, body=body, flags=flags)
+        return respcls(url=url, status=status, headers=headers, body=body, flags=flags, ip_address=ip_address)
 
 
 @implementer(IBodyProducer)
@@ -456,6 +457,11 @@ class _ResponseReader(protocol.Protocol):
         self._fail_on_dataloss_warned = False
         self._reached_warnsize = False
         self._bytes_received = 0
+        self._ip_address = None
+
+    def connectionMade(self):
+        if self._ip_address is None:
+            self._ip_address = ip_address(self.transport._producer.getPeer().host)
 
     def dataReceived(self, bodyBytes):
         # This maybe called several times after cancel was called with buffered data.
@@ -488,16 +494,16 @@ class _ResponseReader(protocol.Protocol):
 
         body = self._bodybuf.getvalue()
         if reason.check(ResponseDone):
-            self._finished.callback((self._txresponse, body, None))
+            self._finished.callback((self._txresponse, body, None, self._ip_address))
             return
 
         if reason.check(PotentialDataLoss):
-            self._finished.callback((self._txresponse, body, ['partial']))
+            self._finished.callback((self._txresponse, body, ['partial'], self._ip_address))
             return
 
         if reason.check(ResponseFailed) and any(r.check(_DataLoss) for r in reason.value.reasons):
             if not self._fail_on_dataloss:
-                self._finished.callback((self._txresponse, body, ['dataloss']))
+                self._finished.callback((self._txresponse, body, ['dataloss'], self._ip_address))
                 return
 
             elif not self._fail_on_dataloss_warned:
diff --git a/scrapy/http/response/__init__.py b/scrapy/http/response/__init__.py
index f92d0901c..ca5ecc02c 100644
--- a/scrapy/http/response/__init__.py
+++ b/scrapy/http/response/__init__.py
@@ -17,13 +17,14 @@ from scrapy.utils.trackref import object_ref
 
 class Response(object_ref):
 
-    def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
+    def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None, ip_address=None):
         self.headers = Headers(headers or {})
         self.status = int(status)
         self._set_body(body)
         self._set_url(url)
         self.request = request
         self.flags = [] if flags is None else list(flags)
+        self.ip_address = ip_address
 
     @property
     def meta(self):
@@ -76,7 +77,7 @@ class Response(object_ref):
         """Create a new Response with the same attributes except for those
         given new values.
         """
-        for x in ['url', 'status', 'headers', 'body', 'request', 'flags']:
+        for x in ['url', 'status', 'headers', 'body', 'request', 'flags', 'ip_address']:
             kwargs.setdefault(x, getattr(self, x))
         cls = kwargs.pop('cls', self.__class__)
         return cls(*args, **kwargs)
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index f433fcea6..6281160ae 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -1,5 +1,7 @@
 import json
 import logging
+from ipaddress import IPv4Address
+from urllib.parse import urlparse
 
 from testfixtures import LogCapture
 from twisted.internet import defer
@@ -308,3 +310,28 @@ with multiples lines
         self.assertIn("[callback] status 201", str(log))
         self.assertIn("[errback] status 404", str(log))
         self.assertIn("[errback] status 500", str(log))
+
+    @defer.inlineCallbacks
+    def test_dns_server_ip_address(self):
+        from socket import gethostbyname
+
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = 'https://example.org'
+        yield crawler.crawl(seed=url)
+        ip_address = crawler.spider.meta['responses'][0].ip_address
+        self.assertIsInstance(ip_address, IPv4Address)
+        self.assertEqual(str(ip_address), gethostbyname(urlparse(url).netloc))
+
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url('/status?n=200')
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        ip_address = crawler.spider.meta['responses'][0].ip_address
+        self.assertIsNone(ip_address)
+
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url('/echo?body=test')
+        expected_netloc, _ = urlparse(url).netloc.split(':')
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        ip_address = crawler.spider.meta['responses'][0].ip_address
+        self.assertIsInstance(ip_address, IPv4Address)
+        self.assertEqual(str(ip_address), gethostbyname(expected_netloc))

From e8da7e296691d2b4eb63e2a442bb600e03e5766f Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 26 Jan 2020 17:53:39 -0300
Subject: [PATCH 005/181] Test DNS resolution using CrawlerProcess

---
 tests/CrawlerProcess/ip_address.py | 51 ++++++++++++++++++++++++++++++
 tests/test_crawl.py                | 10 +-----
 tests/test_crawler.py              |  8 +++++
 3 files changed, 60 insertions(+), 9 deletions(-)
 create mode 100644 tests/CrawlerProcess/ip_address.py

diff --git a/tests/CrawlerProcess/ip_address.py b/tests/CrawlerProcess/ip_address.py
new file mode 100644
index 000000000..6b069cc90
--- /dev/null
+++ b/tests/CrawlerProcess/ip_address.py
@@ -0,0 +1,51 @@
+from urllib.parse import urlparse
+
+from twisted.internet import defer
+from twisted.internet.base import ThreadedResolver
+from twisted.internet.interfaces import IResolverSimple
+from zope.interface.declarations import implementer
+
+from scrapy import Spider, Request
+from scrapy.crawler import CrawlerProcess
+
+from tests.mockserver import MockServer
+
+
+@implementer(IResolverSimple)
+class MockThreadedResolver(ThreadedResolver):
+    """
+    Resolves all names to localhost
+    """
+
+    @classmethod
+    def from_crawler(cls, crawler, reactor):
+        return cls(reactor)
+
+    def install_on_reactor(self,):
+        self.reactor.installResolver(self)
+
+    def getHostByName(self, name, timeout=None):
+        return defer.succeed("127.0.0.1")
+
+
+class LocalhostSpider(Spider):
+    name = "localhost_spider"
+
+    def start_requests(self):
+        yield Request(self.url)
+
+    def parse(self, response):
+        netloc = urlparse(response.url).netloc
+        self.logger.info("Host: %s" % netloc.split(":")[0])
+        self.logger.info("Type: %s" % type(response.ip_address))
+        self.logger.info("IP address: %s" % response.ip_address)
+
+
+with MockServer() as mockserver:
+    settings = {"DNS_RESOLVER": __name__ + ".MockThreadedResolver"}
+    process = CrawlerProcess(settings)
+
+    port = urlparse(mockserver.http_address).port
+    url = "http://not.a.real.domain:{port}/echo?body=test".format(port=port)
+    process.crawl(LocalhostSpider, url=url)
+    process.start()
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 6281160ae..9896058dc 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -1,6 +1,7 @@
 import json
 import logging
 from ipaddress import IPv4Address
+from socket import gethostbyname
 from urllib.parse import urlparse
 
 from testfixtures import LogCapture
@@ -313,15 +314,6 @@ with multiples lines
 
     @defer.inlineCallbacks
     def test_dns_server_ip_address(self):
-        from socket import gethostbyname
-
-        crawler = self.runner.create_crawler(SingleRequestSpider)
-        url = 'https://example.org'
-        yield crawler.crawl(seed=url)
-        ip_address = crawler.spider.meta['responses'][0].ip_address
-        self.assertIsInstance(ip_address, IPv4Address)
-        self.assertEqual(str(ip_address), gethostbyname(urlparse(url).netloc))
-
         crawler = self.runner.create_crawler(SingleRequestSpider)
         url = self.mockserver.url('/status?n=200')
         yield crawler.crawl(seed=url, mockserver=self.mockserver)
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 0ce0674de..dfc1cf448 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -107,6 +107,7 @@ class CrawlerLoggingTestCase(unittest.TestCase):
 
     def test_spider_custom_settings_log_level(self):
         log_file = self.mktemp()
+
         class MySpider(scrapy.Spider):
             name = 'spider'
             custom_settings = {
@@ -323,3 +324,10 @@ class CrawlerProcessSubprocess(unittest.TestCase):
             "'downloader/exception_type_count/twisted.internet.error.ConnectionRefusedError': 1," in log,
             "'downloader/exception_type_count/twisted.internet.error.ConnectError': 1," in log,
         ]))
+
+    def test_response_ip_address(self):
+        log = self.run_script("ip_address.py")
+        self.assertIn("Spider closed (finished)", log)
+        self.assertIn("Host: not.a.real.domain", log)
+        self.assertIn("Type: <class 'ipaddress.IPv4Address'>", log)
+        self.assertIn("IP address: 127.0.0.1", log)

From 8529dff41d3d2f6c81ee58c60b16dd9f2b8f72b4 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 26 Jan 2020 18:00:56 -0300
Subject: [PATCH 006/181] Update docs regarding Response.ip_address and IPv6

---
 docs/topics/request-response.rst | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index a4cc1a7d7..17eb63064 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -612,10 +612,7 @@ Response objects
     :type request: :class:`Request` object
 
     :param ip_address: The IP address of the server from which the Response originated.
-    :type ip_address: :class:`ipaddress.IPv4Address` object
-
-    .. FIXME: Add ipaddress.IPv6Address once it's supported
-
+    :type ip_address: :class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`
 
     .. attribute:: Response.url
 

From 72b8613ee9827af031862bd84f1bea9acefcbebe Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 27 Nov 2019 14:46:20 -0300
Subject: [PATCH 007/181] bytes_received signal (no tests)

---
 docs/topics/signals.rst                   | 45 ++++++++++++++++-------
 scrapy/core/downloader/handlers/http11.py | 25 +++++++++++--
 scrapy/signals.py                         |  1 +
 3 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 3f29aa323..6efb73abb 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -73,7 +73,7 @@ engine_started
 
     Sent when the Scrapy engine has started crawling.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
 .. note:: This signal may be fired *after* the :signal:`spider_opened` signal,
     depending on how the spider was started. So **don't** rely on this signal
@@ -88,7 +88,7 @@ engine_stopped
     Sent when the Scrapy engine is stopped (for example, when a crawling
     process has finished).
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
 item_scraped
 ------------
@@ -99,7 +99,7 @@ item_scraped
     Sent when an item has been scraped, after it has passed all the
     :ref:`topics-item-pipeline` stages (without being dropped).
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param item: the item scraped
     :type item: dict or :class:`~scrapy.item.Item` object
@@ -119,7 +119,7 @@ item_dropped
     Sent after an item has been dropped from the :ref:`topics-item-pipeline`
     when some stage raised a :exc:`~scrapy.exceptions.DropItem` exception.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param item: the item dropped from the :ref:`topics-item-pipeline`
     :type item: dict or :class:`~scrapy.item.Item` object
@@ -144,7 +144,7 @@ item_error
     Sent when a :ref:`topics-item-pipeline` generates an error (ie. raises
     an exception), except :exc:`~scrapy.exceptions.DropItem` exception.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param item: the item dropped from the :ref:`topics-item-pipeline`
     :type item: dict or :class:`~scrapy.item.Item` object
@@ -158,6 +158,23 @@ item_error
     :param failure: the exception raised
     :type failure: twisted.python.failure.Failure
 
+bytes_received
+--------------
+
+.. signal:: bytes_received
+.. function:: bytes_received(data, request)
+
+    Sent by the HTTP 1.1 download handler when a group of bytes is
+    received for a specific request.
+
+    This signal does not support returning deferreds from its handlers.
+
+    :param data: the data received by the download handler
+    :type spider: :class:`bytes` object
+
+    :param request: the request that generated the response
+    :type request: :class:`~scrapy.http.Request` object
+
 spider_closed
 -------------
 
@@ -167,7 +184,7 @@ spider_closed
     Sent after a spider has been closed. This can be used to release per-spider
     resources reserved on :signal:`spider_opened`.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param spider: the spider which has been closed
     :type spider: :class:`~scrapy.spiders.Spider` object
@@ -191,7 +208,7 @@ spider_opened
     reserve per-spider resources, but can be used for any task that needs to be
     performed when a spider is opened.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param spider: the spider which has been opened
     :type spider: :class:`~scrapy.spiders.Spider` object
@@ -215,7 +232,7 @@ spider_idle
     You may raise a :exc:`~scrapy.exceptions.DontCloseSpider` exception to
     prevent the spider from being closed.
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param spider: the spider which has gone idle
     :type spider: :class:`~scrapy.spiders.Spider` object
@@ -234,7 +251,7 @@ spider_error
 
     Sent when a spider callback generates an error (ie. raises an exception).
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param failure: the exception raised
     :type failure: twisted.python.failure.Failure
@@ -254,7 +271,7 @@ request_scheduled
     Sent when the engine schedules a :class:`~scrapy.http.Request`, to be
     downloaded later.
 
-    The signal does not support returning deferreds from their handlers.
+    The signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached the scheduler
     :type request: :class:`~scrapy.http.Request` object
@@ -271,7 +288,7 @@ request_dropped
     Sent when a :class:`~scrapy.http.Request`, scheduled by the engine to be
     downloaded later, is rejected by the scheduler.
 
-    The signal does not support returning deferreds from their handlers.
+    The signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached the scheduler
     :type request: :class:`~scrapy.http.Request` object
@@ -287,7 +304,7 @@ request_reached_downloader
 
     Sent when a :class:`~scrapy.http.Request` reached downloader.
 
-    The signal does not support returning deferreds from their handlers.
+    The signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached downloader
     :type request: :class:`~scrapy.http.Request` object
@@ -304,7 +321,7 @@ response_received
     Sent when the engine receives a new :class:`~scrapy.http.Response` from the
     downloader.
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param response: the response received
     :type response: :class:`~scrapy.http.Response` object
@@ -323,7 +340,7 @@ response_downloaded
 
     Sent by the downloader right after a ``HTTPResponse`` is downloaded.
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param response: the response downloaded
     :type response: :class:`~scrapy.http.Response` object
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 5a5f6cf0a..92c3d5f5c 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -16,6 +16,7 @@ from twisted.web.http_headers import Headers as TxHeaders
 from twisted.web.iweb import IBodyProducer, UNKNOWN_LENGTH
 from zope.interface import implementer
 
+from scrapy import signals
 from scrapy.core.downloader.tls import openssl_methods
 from scrapy.core.downloader.webclient import _parse
 from scrapy.exceptions import ScrapyDeprecationWarning
@@ -32,6 +33,7 @@ class HTTP11DownloadHandler:
     lazy = False
 
     def __init__(self, settings, crawler=None):
+        self.crawler = crawler
         self._pool = HTTPConnectionPool(reactor, persistent=True)
         self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
         self._pool._factory.noisy = False
@@ -76,6 +78,7 @@ class HTTP11DownloadHandler:
             maxsize=getattr(spider, 'download_maxsize', self._default_maxsize),
             warnsize=getattr(spider, 'download_warnsize', self._default_warnsize),
             fail_on_dataloss=self._fail_on_dataloss,
+            crawler=self.crawler,
         )
         return agent.download_request(request)
 
@@ -272,7 +275,7 @@ class ScrapyAgent(object):
     _TunnelingAgent = TunnelingAgent
 
     def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None,
-                 maxsize=0, warnsize=0, fail_on_dataloss=True):
+                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None):
         self._contextFactory = contextFactory
         self._connectTimeout = connectTimeout
         self._bindAddress = bindAddress
@@ -281,6 +284,7 @@ class ScrapyAgent(object):
         self._warnsize = warnsize
         self._fail_on_dataloss = fail_on_dataloss
         self._txresponse = None
+        self._crawler = crawler
 
     def _get_agent(self, request, timeout):
         bindaddress = request.meta.get('bindaddress') or self._bindAddress
@@ -409,7 +413,15 @@ class ScrapyAgent(object):
 
         d = defer.Deferred(_cancel)
         txresponse.deliverBody(
-            _ResponseReader(d, txresponse, request, maxsize, warnsize, fail_on_dataloss)
+            _ResponseReader(
+                d,
+                txresponse,
+                request,
+                maxsize,
+                warnsize,
+                fail_on_dataloss,
+                self._crawler,
+            )
         )
 
         # save response for timeouts
@@ -445,7 +457,7 @@ class _RequestBodyProducer(object):
 
 class _ResponseReader(protocol.Protocol):
 
-    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss):
+    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler):
         self._finished = finished
         self._txresponse = txresponse
         self._request = request
@@ -456,6 +468,7 @@ class _ResponseReader(protocol.Protocol):
         self._fail_on_dataloss_warned = False
         self._reached_warnsize = False
         self._bytes_received = 0
+        self._crawler = crawler
 
     def dataReceived(self, bodyBytes):
         # This maybe called several times after cancel was called with buffered data.
@@ -465,6 +478,12 @@ class _ResponseReader(protocol.Protocol):
         self._bodybuf.write(bodyBytes)
         self._bytes_received += len(bodyBytes)
 
+        self._crawler.signals.send_catch_log(
+            signal=signals.bytes_received,
+            data=bodyBytes,
+            request=self._request,
+        )
+
         if self._maxsize and self._bytes_received > self._maxsize:
             logger.error("Received (%(bytes)s) bytes larger than download "
                          "max size (%(maxsize)s) in request %(request)s.",
diff --git a/scrapy/signals.py b/scrapy/signals.py
index 6b9125302..590421893 100644
--- a/scrapy/signals.py
+++ b/scrapy/signals.py
@@ -16,6 +16,7 @@ request_dropped = object()
 request_reached_downloader = object()
 response_received = object()
 response_downloaded = object()
+bytes_received = object()
 item_scraped = object()
 item_dropped = object()
 item_error = object()

From cab449b1952020b86fbe2915a537150fc885c567 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 29 Nov 2019 11:37:40 -0300
Subject: [PATCH 008/181] Typo fix

---
 tests/test_engine.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index 25dee7c1f..9d68836cc 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -107,7 +107,7 @@ class CrawlerRun(object):
         self.reqreached = []
         self.itemerror = []
         self.itemresp = []
-        self.signals_catched = {}
+        self.signals_caught = {}
         self.spider_class = spider_class
 
     def run(self):
@@ -172,7 +172,7 @@ class CrawlerRun(object):
         signalargs = kwargs.copy()
         sig = signalargs.pop('signal')
         signalargs.pop('sender', None)
-        self.signals_catched[sig] = signalargs
+        self.signals_caught[sig] = signalargs
 
 
 class EngineTest(unittest.TestCase):
@@ -186,7 +186,7 @@ class EngineTest(unittest.TestCase):
             self._assert_scheduled_requests(urls_to_visit=8)
             self._assert_downloaded_responses()
             self._assert_scraped_items()
-            self._assert_signals_catched()
+            self._assert_signals_caught()
 
     @defer.inlineCallbacks
     def test_crawler_dupefilter(self):
@@ -263,19 +263,19 @@ class EngineTest(unittest.TestCase):
                 self.assertEqual('Item 2 name', item['name'])
                 self.assertEqual('200', item['price'])
 
-    def _assert_signals_catched(self):
-        assert signals.engine_started in self.run.signals_catched
-        assert signals.engine_stopped in self.run.signals_catched
-        assert signals.spider_opened in self.run.signals_catched
-        assert signals.spider_idle in self.run.signals_catched
-        assert signals.spider_closed in self.run.signals_catched
+    def _assert_signals_caught(self):
+        assert signals.engine_started in self.run.signals_caught
+        assert signals.engine_stopped in self.run.signals_caught
+        assert signals.spider_opened in self.run.signals_caught
+        assert signals.spider_idle in self.run.signals_caught
+        assert signals.spider_closed in self.run.signals_caught
 
         self.assertEqual({'spider': self.run.spider},
-                         self.run.signals_catched[signals.spider_opened])
+                         self.run.signals_caught[signals.spider_opened])
         self.assertEqual({'spider': self.run.spider},
-                         self.run.signals_catched[signals.spider_idle])
+                         self.run.signals_caught[signals.spider_idle])
         self.assertEqual({'spider': self.run.spider, 'reason': 'finished'},
-                         self.run.signals_catched[signals.spider_closed])
+                         self.run.signals_caught[signals.spider_closed])
 
     @defer.inlineCallbacks
     def test_close_downloader(self):

From bda37e38bd53d5aae691b56d4136fbff99f78158 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 29 Nov 2019 12:02:27 -0300
Subject: [PATCH 009/181] [Tests] bytes_received signal

---
 tests/test_engine.py | 53 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index 9d68836cc..b63c7e232 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -13,22 +13,24 @@ module with the ``runserver`` argument::
 import os
 import re
 import sys
+from collections import defaultdict
 from urllib.parse import urlparse
 
 from twisted.internet import reactor, defer
-from twisted.web import server, static, util
 from twisted.trial import unittest
+from twisted.web import server, static, util
+from pydispatch import dispatcher
 
 from scrapy import signals
 from scrapy.core.engine import ExecutionEngine
-from scrapy.utils.test import get_crawler
-from pydispatch import dispatcher
-from tests import tests_datadir
-from scrapy.spiders import Spider
+from scrapy.http import Request
 from scrapy.item import Item, Field
 from scrapy.linkextractors import LinkExtractor
-from scrapy.http import Request
+from scrapy.spiders import Spider
 from scrapy.utils.signal import disconnect_all
+from scrapy.utils.test import get_crawler
+
+from tests import tests_datadir, get_testdata
 
 
 class TestItem(Item):
@@ -107,6 +109,7 @@ class CrawlerRun(object):
         self.reqreached = []
         self.itemerror = []
         self.itemresp = []
+        self.bytes = defaultdict(lambda: b"")
         self.signals_caught = {}
         self.spider_class = spider_class
 
@@ -124,6 +127,7 @@ class CrawlerRun(object):
         self.crawler = get_crawler(self.spider_class)
         self.crawler.signals.connect(self.item_scraped, signals.item_scraped)
         self.crawler.signals.connect(self.item_error, signals.item_error)
+        self.crawler.signals.connect(self.bytes_received, signals.bytes_received)
         self.crawler.signals.connect(self.request_scheduled, signals.request_scheduled)
         self.crawler.signals.connect(self.request_dropped, signals.request_dropped)
         self.crawler.signals.connect(self.request_reached, signals.request_reached_downloader)
@@ -155,6 +159,9 @@ class CrawlerRun(object):
     def item_scraped(self, item, spider, response):
         self.itemresp.append((item, response))
 
+    def bytes_received(self, data, request):
+        self.bytes[request] += data
+
     def request_scheduled(self, request, spider):
         self.reqplug.append((request, spider))
 
@@ -187,6 +194,7 @@ class EngineTest(unittest.TestCase):
             self._assert_downloaded_responses()
             self._assert_scraped_items()
             self._assert_signals_caught()
+            self._assert_bytes_received()
 
     @defer.inlineCallbacks
     def test_crawler_dupefilter(self):
@@ -263,6 +271,39 @@ class EngineTest(unittest.TestCase):
                 self.assertEqual('Item 2 name', item['name'])
                 self.assertEqual('200', item['price'])
 
+    def _assert_bytes_received(self):
+        self.assertEqual(8, len(self.run.bytes))
+        for request, data in self.run.bytes.items():
+            if self.run.getpath(request.url) == "/":
+                self.assertEqual(data, get_testdata("test_site", "index.html"))
+            elif self.run.getpath(request.url) == "/item1.html":
+                self.assertEqual(data, get_testdata("test_site", "item1.html"))
+            elif self.run.getpath(request.url) == "/item2.html":
+                self.assertEqual(data, get_testdata("test_site", "item2.html"))
+            elif self.run.getpath(request.url) == "/redirected":
+                self.assertEqual(data, b"Redirected here")
+            elif self.run.getpath(request.url) == '/redirect':
+                self.assertEqual(data,
+                    b"\n<html>\n"
+                    b"    <head>\n"
+                    b"        <meta http-equiv=\"refresh\" content=\"0;URL=/redirected\">\n"
+                    b"    </head>\n"
+                    b"    <body bgcolor=\"#FFFFFF\" text=\"#000000\">\n"
+                    b"    <a href=\"/redirected\">click here</a>\n"
+                    b"    </body>\n"
+                    b"</html>\n"
+                )
+            elif self.run.getpath(request.url) == "/tem999.html":
+                self.assertEqual(data,
+                    b"\n<html>\n"
+                    b"  <head><title>404 - No Such Resource</title></head>\n"
+                    b"  <body>\n"
+                    b"    <h1>No Such Resource</h1>\n"
+                    b"    <p>File not found.</p>\n"
+                    b"  </body>\n"
+                    b"</html>\n"
+                )
+
     def _assert_signals_caught(self):
         assert signals.engine_started in self.run.signals_caught
         assert signals.engine_stopped in self.run.signals_caught

From 89483ce9f709e230ee5ff9050d206430d2d17c9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 3 Dec 2019 12:06:08 +0100
Subject: [PATCH 010/181] Fix Flake8 issues

---
 tests/test_engine.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index b63c7e232..c0769c992 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -283,7 +283,8 @@ class EngineTest(unittest.TestCase):
             elif self.run.getpath(request.url) == "/redirected":
                 self.assertEqual(data, b"Redirected here")
             elif self.run.getpath(request.url) == '/redirect':
-                self.assertEqual(data,
+                self.assertEqual(
+                    data,
                     b"\n<html>\n"
                     b"    <head>\n"
                     b"        <meta http-equiv=\"refresh\" content=\"0;URL=/redirected\">\n"
@@ -294,7 +295,8 @@ class EngineTest(unittest.TestCase):
                     b"</html>\n"
                 )
             elif self.run.getpath(request.url) == "/tem999.html":
-                self.assertEqual(data,
+                self.assertEqual(
+                    data,
                     b"\n<html>\n"
                     b"  <head><title>404 - No Such Resource</title></head>\n"
                     b"  <body>\n"

From dbe20a863ff63dce937b2d3b159782d8268e6838 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 27 Jan 2020 12:21:18 -0300
Subject: [PATCH 011/181] bytes_received signal: send spider argument

---
 docs/topics/signals.rst                   | 5 ++++-
 scrapy/core/downloader/handlers/http11.py | 1 +
 tests/test_engine.py                      | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 6efb73abb..3e70ca067 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -162,7 +162,7 @@ bytes_received
 --------------
 
 .. signal:: bytes_received
-.. function:: bytes_received(data, request)
+.. function:: bytes_received(data, request, spider)
 
     Sent by the HTTP 1.1 download handler when a group of bytes is
     received for a specific request.
@@ -175,6 +175,9 @@ bytes_received
     :param request: the request that generated the response
     :type request: :class:`~scrapy.http.Request` object
 
+    :param spider: the spider associated with the response
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
 spider_closed
 -------------
 
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 92c3d5f5c..c53c9bb2d 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -482,6 +482,7 @@ class _ResponseReader(protocol.Protocol):
             signal=signals.bytes_received,
             data=bodyBytes,
             request=self._request,
+            spider=self._crawler.spider,
         )
 
         if self._maxsize and self._bytes_received > self._maxsize:
diff --git a/tests/test_engine.py b/tests/test_engine.py
index c0769c992..57cc89ba3 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -159,7 +159,7 @@ class CrawlerRun(object):
     def item_scraped(self, item, spider, response):
         self.itemresp.append((item, response))
 
-    def bytes_received(self, data, request):
+    def bytes_received(self, data, request, spider):
         self.bytes[request] += data
 
     def request_scheduled(self, request, spider):

From 613fd41f44d1455f9c9369087958674f3fdfcc8d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 27 Jan 2020 12:30:26 -0300
Subject: [PATCH 012/181] bytes_received signal: improve test performance

---
 tests/test_engine.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index 57cc89ba3..bb475958e 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -109,7 +109,7 @@ class CrawlerRun(object):
         self.reqreached = []
         self.itemerror = []
         self.itemresp = []
-        self.bytes = defaultdict(lambda: b"")
+        self.bytes = defaultdict(lambda: list())
         self.signals_caught = {}
         self.spider_class = spider_class
 
@@ -160,7 +160,7 @@ class CrawlerRun(object):
         self.itemresp.append((item, response))
 
     def bytes_received(self, data, request, spider):
-        self.bytes[request] += data
+        self.bytes[request].append(data)
 
     def request_scheduled(self, request, spider):
         self.reqplug.append((request, spider))
@@ -274,17 +274,18 @@ class EngineTest(unittest.TestCase):
     def _assert_bytes_received(self):
         self.assertEqual(8, len(self.run.bytes))
         for request, data in self.run.bytes.items():
+            joined_data = b"".join(data)
             if self.run.getpath(request.url) == "/":
-                self.assertEqual(data, get_testdata("test_site", "index.html"))
+                self.assertEqual(joined_data, get_testdata("test_site", "index.html"))
             elif self.run.getpath(request.url) == "/item1.html":
-                self.assertEqual(data, get_testdata("test_site", "item1.html"))
+                self.assertEqual(joined_data, get_testdata("test_site", "item1.html"))
             elif self.run.getpath(request.url) == "/item2.html":
-                self.assertEqual(data, get_testdata("test_site", "item2.html"))
+                self.assertEqual(joined_data, get_testdata("test_site", "item2.html"))
             elif self.run.getpath(request.url) == "/redirected":
-                self.assertEqual(data, b"Redirected here")
+                self.assertEqual(joined_data, b"Redirected here")
             elif self.run.getpath(request.url) == '/redirect':
                 self.assertEqual(
-                    data,
+                    joined_data,
                     b"\n<html>\n"
                     b"    <head>\n"
                     b"        <meta http-equiv=\"refresh\" content=\"0;URL=/redirected\">\n"
@@ -296,7 +297,7 @@ class EngineTest(unittest.TestCase):
                 )
             elif self.run.getpath(request.url) == "/tem999.html":
                 self.assertEqual(
-                    data,
+                    joined_data,
                     b"\n<html>\n"
                     b"  <head><title>404 - No Such Resource</title></head>\n"
                     b"  <body>\n"

From 4ffd18fb11ff89863569b8b4de44241e3ca2f86e Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 27 Jan 2020 13:29:33 -0300
Subject: [PATCH 013/181] [docs] Mention that signals.bytes_received could be
 fired multiple times

---
 docs/topics/signals.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 3e70ca067..f490911f3 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -165,7 +165,8 @@ bytes_received
 .. function:: bytes_received(data, request, spider)
 
     Sent by the HTTP 1.1 download handler when a group of bytes is
-    received for a specific request.
+    received for a specific request. This signal might be fired
+    multiple times for the same request.
 
     This signal does not support returning deferreds from its handlers.
 

From 2c9643d38cc076c4d2032efd994fda4cfcc9f88a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 29 Jan 2020 14:11:56 -0300
Subject: [PATCH 014/181] Test: bytes_received signal fired multiple times

---
 tests/test_engine.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index bb475958e..3c5cc403b 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -12,6 +12,7 @@ module with the ``runserver`` argument::
 
 import os
 import re
+import string
 import sys
 from collections import defaultdict
 from urllib.parse import urlparse
@@ -90,6 +91,7 @@ def start_test_site(debug=False):
     r = static.File(root_dir)
     r.putChild(b"redirect", util.Redirect(b"/redirected"))
     r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
+    r.putChild(b"random", static.Data(string.ascii_letters.encode("utf8") * 2**14, "text/plain"))
 
     port = reactor.listenTCP(0, server.Site(r), interface="127.0.0.1")
     if debug:
@@ -117,8 +119,12 @@ class CrawlerRun(object):
         self.port = start_test_site()
         self.portno = self.port.getHost().port
 
-        start_urls = [self.geturl("/"), self.geturl("/redirect"),
-                      self.geturl("/redirect")]  # a duplicate
+        start_urls = [
+            self.geturl("/"),
+            self.geturl("/redirect"),
+            self.geturl("/redirect"),  # duplicate
+            self.geturl("/random"),
+        ]
 
         for name, signal in vars(signals).items():
             if not name.startswith('_'):
@@ -190,7 +196,7 @@ class EngineTest(unittest.TestCase):
             self.run = CrawlerRun(spider)
             yield self.run.run()
             self._assert_visited_urls()
-            self._assert_scheduled_requests(urls_to_visit=8)
+            self._assert_scheduled_requests(urls_to_visit=9)
             self._assert_downloaded_responses()
             self._assert_scraped_items()
             self._assert_signals_caught()
@@ -200,7 +206,7 @@ class EngineTest(unittest.TestCase):
     def test_crawler_dupefilter(self):
         self.run = CrawlerRun(TestDupeFilterSpider)
         yield self.run.run()
-        self._assert_scheduled_requests(urls_to_visit=7)
+        self._assert_scheduled_requests(urls_to_visit=8)
         self._assert_dropped_requests()
 
     @defer.inlineCallbacks
@@ -237,8 +243,8 @@ class EngineTest(unittest.TestCase):
 
     def _assert_downloaded_responses(self):
         # response tests
-        self.assertEqual(8, len(self.run.respplug))
-        self.assertEqual(8, len(self.run.reqreached))
+        self.assertEqual(9, len(self.run.respplug))
+        self.assertEqual(9, len(self.run.reqreached))
 
         for response, _ in self.run.respplug:
             if self.run.getpath(response.url) == '/item999.html':
@@ -272,7 +278,7 @@ class EngineTest(unittest.TestCase):
                 self.assertEqual('200', item['price'])
 
     def _assert_bytes_received(self):
-        self.assertEqual(8, len(self.run.bytes))
+        self.assertEqual(9, len(self.run.bytes))
         for request, data in self.run.bytes.items():
             joined_data = b"".join(data)
             if self.run.getpath(request.url) == "/":
@@ -306,6 +312,8 @@ class EngineTest(unittest.TestCase):
                     b"  </body>\n"
                     b"</html>\n"
                 )
+            elif self.run.getpath(request.url) == "/random":
+                self.assertTrue(len(data) > 1)  # signal was fired multiple times
 
     def _assert_signals_caught(self):
         assert signals.engine_started in self.run.signals_caught

From a499f38b14d16338d20084c0dcb24528a1f1f22f Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 29 Jan 2020 14:35:17 -0300
Subject: [PATCH 015/181] Remove object parent class

---
 scrapy/core/downloader/handlers/http11.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index c53c9bb2d..6f1bd9ad6 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -268,7 +268,7 @@ class ScrapyProxyAgent(Agent):
         )
 
 
-class ScrapyAgent(object):
+class ScrapyAgent:
 
     _Agent = Agent
     _ProxyAgent = ScrapyProxyAgent
@@ -438,7 +438,7 @@ class ScrapyAgent(object):
 
 
 @implementer(IBodyProducer)
-class _RequestBodyProducer(object):
+class _RequestBodyProducer:
 
     def __init__(self, body):
         self.body = body

From 6f02a8dccb95373f22bac18c08d9fda8169dcb02 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 29 Jan 2020 14:53:23 -0300
Subject: [PATCH 016/181] Add source parameter to bytes_received signal

---
 docs/topics/signals.rst                   | 12 ++++++++----
 scrapy/core/downloader/handlers/http11.py | 18 +++++++++++++-----
 scrapy/core/downloader/handlers/s3.py     |  1 +
 tests/test_downloader_handlers.py         |  3 +++
 tests/test_engine.py                      |  5 ++++-
 5 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index f490911f3..3a15bf95c 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -162,11 +162,11 @@ bytes_received
 --------------
 
 .. signal:: bytes_received
-.. function:: bytes_received(data, request, spider)
+.. function:: bytes_received(data, request, spider, source)
 
-    Sent by the HTTP 1.1 download handler when a group of bytes is
-    received for a specific request. This signal might be fired
-    multiple times for the same request.
+    Sent by the HTTP 1.1 and S3 download handlers when a group of bytes is
+    received for a specific request. This signal might be fired multiple
+    times for the same request, with partial data each time.
 
     This signal does not support returning deferreds from its handlers.
 
@@ -179,6 +179,10 @@ bytes_received
     :param spider: the spider associated with the response
     :type spider: :class:`~scrapy.spiders.Spider` object
 
+    :param source: a string to identify which handler sent the signal
+        (current values could be "http11" or "s3")
+    :type source: :class:`str` object
+
 spider_closed
 -------------
 
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 6f1bd9ad6..49c9eacac 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -32,8 +32,9 @@ logger = logging.getLogger(__name__)
 class HTTP11DownloadHandler:
     lazy = False
 
-    def __init__(self, settings, crawler=None):
+    def __init__(self, settings, crawler=None, source="http11"):
         self.crawler = crawler
+        self.source = source
         self._pool = HTTPConnectionPool(reactor, persistent=True)
         self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
         self._pool._factory.noisy = False
@@ -67,8 +68,8 @@ class HTTP11DownloadHandler:
         self._disconnect_timeout = 1
 
     @classmethod
-    def from_crawler(cls, crawler):
-        return cls(crawler.settings, crawler)
+    def from_crawler(cls, crawler, **kwargs):
+        return cls(crawler.settings, crawler, **kwargs)
 
     def download_request(self, request, spider):
         """Return a deferred for the HTTP download"""
@@ -79,6 +80,7 @@ class HTTP11DownloadHandler:
             warnsize=getattr(spider, 'download_warnsize', self._default_warnsize),
             fail_on_dataloss=self._fail_on_dataloss,
             crawler=self.crawler,
+            source=self.source,
         )
         return agent.download_request(request)
 
@@ -275,7 +277,7 @@ class ScrapyAgent:
     _TunnelingAgent = TunnelingAgent
 
     def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None,
-                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None):
+                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None, source=None):
         self._contextFactory = contextFactory
         self._connectTimeout = connectTimeout
         self._bindAddress = bindAddress
@@ -285,6 +287,7 @@ class ScrapyAgent:
         self._fail_on_dataloss = fail_on_dataloss
         self._txresponse = None
         self._crawler = crawler
+        self._source = source
 
     def _get_agent(self, request, timeout):
         bindaddress = request.meta.get('bindaddress') or self._bindAddress
@@ -421,6 +424,7 @@ class ScrapyAgent:
                 warnsize,
                 fail_on_dataloss,
                 self._crawler,
+                self._source,
             )
         )
 
@@ -457,7 +461,9 @@ class _RequestBodyProducer:
 
 class _ResponseReader(protocol.Protocol):
 
-    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler):
+    def __init__(
+        self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler, source
+    ):
         self._finished = finished
         self._txresponse = txresponse
         self._request = request
@@ -469,6 +475,7 @@ class _ResponseReader(protocol.Protocol):
         self._reached_warnsize = False
         self._bytes_received = 0
         self._crawler = crawler
+        self._source = source
 
     def dataReceived(self, bodyBytes):
         # This maybe called several times after cancel was called with buffered data.
@@ -483,6 +490,7 @@ class _ResponseReader(protocol.Protocol):
             data=bodyBytes,
             request=self._request,
             spider=self._crawler.spider,
+            source=self._source,
         )
 
         if self._maxsize and self._bytes_received > self._maxsize:
diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py
index 40a1fa48e..2366b6394 100644
--- a/scrapy/core/downloader/handlers/s3.py
+++ b/scrapy/core/downloader/handlers/s3.py
@@ -73,6 +73,7 @@ class S3DownloadHandler:
             objcls=httpdownloadhandler,
             settings=settings,
             crawler=crawler,
+            source="s3",
         )
         self._download_http = _http_handler.download_request
 
diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
index 8d95d7cac..22a813647 100644
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -730,6 +730,9 @@ class Http11ProxyTestCase(HttpProxyTestCase):
 
 class HttpDownloadHandlerMock:
 
+    def __init__(self, *args, **kwargs):
+        pass
+
     def download_request(self, request, spider):
         return request
 
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 3c5cc403b..c83a23b55 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -112,6 +112,7 @@ class CrawlerRun(object):
         self.itemerror = []
         self.itemresp = []
         self.bytes = defaultdict(lambda: list())
+        self.bytes_source = set()
         self.signals_caught = {}
         self.spider_class = spider_class
 
@@ -165,8 +166,9 @@ class CrawlerRun(object):
     def item_scraped(self, item, spider, response):
         self.itemresp.append((item, response))
 
-    def bytes_received(self, data, request, spider):
+    def bytes_received(self, data, request, spider, source):
         self.bytes[request].append(data)
+        self.bytes_source.add(source)
 
     def request_scheduled(self, request, spider):
         self.reqplug.append((request, spider))
@@ -279,6 +281,7 @@ class EngineTest(unittest.TestCase):
 
     def _assert_bytes_received(self):
         self.assertEqual(9, len(self.run.bytes))
+        self.assertEqual(self.run.bytes_source, set(["http11"]))
         for request, data in self.run.bytes.items():
             joined_data = b"".join(data)
             if self.run.getpath(request.url) == "/":

From a2ae380efcaa5a3419a4f6a35541ae0fb71a2e7f Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 3 Feb 2020 13:23:52 -0300
Subject: [PATCH 017/181] Remove unnecessary commas

---
 scrapy/resolver.py                 | 2 +-
 tests/CrawlerProcess/ip_address.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/resolver.py b/scrapy/resolver.py
index 554a3a14d..f69894b1e 100644
--- a/scrapy/resolver.py
+++ b/scrapy/resolver.py
@@ -29,7 +29,7 @@ class CachingThreadedResolver(ThreadedResolver):
             cache_size = 0
         return cls(reactor, cache_size, crawler.settings.getfloat('DNS_TIMEOUT'))
 
-    def install_on_reactor(self,):
+    def install_on_reactor(self):
         self.reactor.installResolver(self)
 
     def getHostByName(self, name, timeout=None):
diff --git a/tests/CrawlerProcess/ip_address.py b/tests/CrawlerProcess/ip_address.py
index 6b069cc90..949e97172 100644
--- a/tests/CrawlerProcess/ip_address.py
+++ b/tests/CrawlerProcess/ip_address.py
@@ -21,7 +21,7 @@ class MockThreadedResolver(ThreadedResolver):
     def from_crawler(cls, crawler, reactor):
         return cls(reactor)
 
-    def install_on_reactor(self,):
+    def install_on_reactor(self):
         self.reactor.installResolver(self)
 
     def getHostByName(self, name, timeout=None):

From bb8f7dc609382153df79774ad9d8f6d33d064279 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 3 Feb 2020 14:50:14 -0300
Subject: [PATCH 018/181] Mock DNS server

---
 tests/mockserver.py | 90 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 22 deletions(-)

diff --git a/tests/mockserver.py b/tests/mockserver.py
index a45277db9..585741f1b 100644
--- a/tests/mockserver.py
+++ b/tests/mockserver.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import os
 import random
@@ -6,18 +7,19 @@ from subprocess import Popen, PIPE
 from urllib.parse import urlencode
 
 from OpenSSL import SSL
-from twisted.web.server import Site, NOT_DONE_YET
-from twisted.web.resource import Resource
+from twisted.internet import defer, reactor, ssl
+from twisted.internet.task import deferLater
+from twisted.names import dns, error
+from twisted.names.server import DNSServerFactory
+from twisted.web.resource import EncodingResourceWrapper, Resource
+from twisted.web.server import GzipEncoderFactory, NOT_DONE_YET, Site
 from twisted.web.static import File
 from twisted.web.test.test_webclient import PayloadResource
-from twisted.web.server import GzipEncoderFactory
-from twisted.web.resource import EncodingResourceWrapper
 from twisted.web.util import redirectTo
-from twisted.internet import reactor, ssl
-from twisted.internet.task import deferLater
 
 from scrapy.utils.python import to_bytes, to_unicode
 from scrapy.utils.ssl import SSL_OP_NO_TLSv1_3
+from scrapy.utils.test import get_testenv
 
 
 def getarg(request, name, default=None, type=None):
@@ -198,12 +200,10 @@ class Root(Resource):
         return b'Scrapy mock HTTP server\n'
 
 
-class MockServer():
+class MockServer:
 
     def __enter__(self):
-        from scrapy.utils.test import get_testenv
-
-        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver'],
+        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', '-t', 'http'],
                           stdout=PIPE, env=get_testenv())
         http_address = self.proc.stdout.readline().strip().decode('ascii')
         https_address = self.proc.stdout.readline().strip().decode('ascii')
@@ -224,6 +224,37 @@ class MockServer():
         return host + path
 
 
+class MockDNSResolver:
+    """
+    Implements twisted.internet.interfaces.IResolver partially
+    """
+
+    def _resolve(self, name):
+        record = dns.Record_A(address=b"127.0.0.1")
+        answer = dns.RRHeader(name=name, payload=record)
+        return [answer], [], []
+
+    def query(self, query, timeout=None):
+        if query.type == dns.A:
+            return defer.succeed(self._resolve(query.name.name))
+        return defer.fail(error.DomainError())
+
+    def lookupAllRecords(self, name, timeout=None):
+        return defer.succeed(self._resolve(name))
+
+
+class MockDNSServer():
+
+    def __enter__(self):
+        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', 'dns'],
+                          stdout=PIPE, env=get_testenv())
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.proc.kill()
+        self.proc.communicate()
+
+
 def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.crt', cipher_string=None):
     factory = ssl.DefaultOpenSSLContextFactory(
          os.path.join(os.path.dirname(__file__), keyfile),
@@ -238,19 +269,34 @@ def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.c
 
 
 if __name__ == "__main__":
-    root = Root()
-    factory = Site(root)
-    httpPort = reactor.listenTCP(0, factory)
-    contextFactory = ssl_context_factory()
-    httpsPort = reactor.listenSSL(0, factory, contextFactory)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", "--type", type=str, choices=("http", "dns"), default="http")
+    args = parser.parse_args()
 
-    def print_listening():
-        httpHost = httpPort.getHost()
-        httpsHost = httpsPort.getHost()
-        httpAddress = 'http://%s:%d' % (httpHost.host, httpHost.port)
-        httpsAddress = 'https://%s:%d' % (httpsHost.host, httpsHost.port)
-        print(httpAddress)
-        print(httpsAddress)
+    if args.type == "http":
+        root = Root()
+        factory = Site(root)
+        httpPort = reactor.listenTCP(0, factory)
+        contextFactory = ssl_context_factory()
+        httpsPort = reactor.listenSSL(0, factory, contextFactory)
+
+        def print_listening():
+            httpHost = httpPort.getHost()
+            httpsHost = httpsPort.getHost()
+            httpAddress = "http://%s:%d" % (httpHost.host, httpHost.port)
+            httpsAddress = "https://%s:%d" % (httpsHost.host, httpsHost.port)
+            print(httpAddress)
+            print(httpsAddress)
+
+    elif args.type == "dns":
+        clients = [MockDNSResolver()]
+        factory = DNSServerFactory(clients=clients)
+        protocol = dns.DNSDatagramProtocol(controller=factory)
+        reactor.listenUDP(10053, protocol)
+        reactor.listenTCP(10053, factory)
+
+        def print_listening():
+            print("DNS server running on port 10053")
 
     reactor.callWhenRunning(print_listening)
     reactor.run()

From 4851efdfb0885a40a44a2834c6c69d0104326801 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 3 Feb 2020 14:50:54 -0300
Subject: [PATCH 019/181] Flake8 adjustments

---
 tests/mockserver.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/mockserver.py b/tests/mockserver.py
index 585741f1b..67139534e 100644
--- a/tests/mockserver.py
+++ b/tests/mockserver.py
@@ -257,9 +257,9 @@ class MockDNSServer():
 
 def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.crt', cipher_string=None):
     factory = ssl.DefaultOpenSSLContextFactory(
-         os.path.join(os.path.dirname(__file__), keyfile),
-         os.path.join(os.path.dirname(__file__), certfile),
-         )
+        os.path.join(os.path.dirname(__file__), keyfile),
+        os.path.join(os.path.dirname(__file__), certfile),
+    )
     if cipher_string:
         ctx = factory.getContext()
         # disabling TLS1.2+ because it unconditionally enables some strong ciphers

From e0ef8ad2d6f958de6ce04cd7756e142efeb1a6a2 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 3 Feb 2020 15:52:15 -0300
Subject: [PATCH 020/181] CrawlerRunner test for Response.ip_address

---
 tests/CrawlerProcess/ip_address.py | 51 ------------------------------
 tests/CrawlerRunner/ip_address.py  | 37 ++++++++++++++++++++++
 tests/mockserver.py                | 11 ++++---
 tests/test_crawler.py              | 20 ++++++++----
 4 files changed, 57 insertions(+), 62 deletions(-)
 delete mode 100644 tests/CrawlerProcess/ip_address.py
 create mode 100644 tests/CrawlerRunner/ip_address.py

diff --git a/tests/CrawlerProcess/ip_address.py b/tests/CrawlerProcess/ip_address.py
deleted file mode 100644
index 949e97172..000000000
--- a/tests/CrawlerProcess/ip_address.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from urllib.parse import urlparse
-
-from twisted.internet import defer
-from twisted.internet.base import ThreadedResolver
-from twisted.internet.interfaces import IResolverSimple
-from zope.interface.declarations import implementer
-
-from scrapy import Spider, Request
-from scrapy.crawler import CrawlerProcess
-
-from tests.mockserver import MockServer
-
-
-@implementer(IResolverSimple)
-class MockThreadedResolver(ThreadedResolver):
-    """
-    Resolves all names to localhost
-    """
-
-    @classmethod
-    def from_crawler(cls, crawler, reactor):
-        return cls(reactor)
-
-    def install_on_reactor(self):
-        self.reactor.installResolver(self)
-
-    def getHostByName(self, name, timeout=None):
-        return defer.succeed("127.0.0.1")
-
-
-class LocalhostSpider(Spider):
-    name = "localhost_spider"
-
-    def start_requests(self):
-        yield Request(self.url)
-
-    def parse(self, response):
-        netloc = urlparse(response.url).netloc
-        self.logger.info("Host: %s" % netloc.split(":")[0])
-        self.logger.info("Type: %s" % type(response.ip_address))
-        self.logger.info("IP address: %s" % response.ip_address)
-
-
-with MockServer() as mockserver:
-    settings = {"DNS_RESOLVER": __name__ + ".MockThreadedResolver"}
-    process = CrawlerProcess(settings)
-
-    port = urlparse(mockserver.http_address).port
-    url = "http://not.a.real.domain:{port}/echo?body=test".format(port=port)
-    process.crawl(LocalhostSpider, url=url)
-    process.start()
diff --git a/tests/CrawlerRunner/ip_address.py b/tests/CrawlerRunner/ip_address.py
new file mode 100644
index 000000000..5a71536d8
--- /dev/null
+++ b/tests/CrawlerRunner/ip_address.py
@@ -0,0 +1,37 @@
+from urllib.parse import urlparse
+
+from twisted.internet import reactor
+from twisted.names.client import createResolver
+
+from scrapy import Spider, Request
+from scrapy.crawler import CrawlerRunner
+from scrapy.utils.log import configure_logging
+
+from tests.mockserver import MockServer, MockDNSServer
+
+
+class LocalhostSpider(Spider):
+    name = "localhost_spider"
+
+    def start_requests(self):
+        yield Request(self.url)
+
+    def parse(self, response):
+        netloc = urlparse(response.url).netloc
+        self.logger.info("Host: %s" % netloc.split(":")[0])
+        self.logger.info("Type: %s" % type(response.ip_address))
+        self.logger.info("IP address: %s" % response.ip_address)
+
+
+with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server:
+    port = urlparse(mock_http_server.http_address).port
+    url = "http://not.a.real.domain:{port}/echo".format(port=port)
+
+    servers = [(mock_dns_server.host, mock_dns_server.port)]
+    reactor.installResolver(createResolver(servers=servers))
+
+    configure_logging()
+    runner = CrawlerRunner()
+    d = runner.crawl(LocalhostSpider, url=url)
+    d.addBoth(lambda _: reactor.stop())
+    reactor.run()
diff --git a/tests/mockserver.py b/tests/mockserver.py
index 67139534e..08a81418c 100644
--- a/tests/mockserver.py
+++ b/tests/mockserver.py
@@ -246,8 +246,11 @@ class MockDNSResolver:
 class MockDNSServer():
 
     def __enter__(self):
-        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', 'dns'],
+        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', '-t', 'dns'],
                           stdout=PIPE, env=get_testenv())
+        host, port = self.proc.stdout.readline().strip().decode('ascii').split(":")
+        self.host = host
+        self.port = int(port)
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
@@ -292,11 +295,11 @@ if __name__ == "__main__":
         clients = [MockDNSResolver()]
         factory = DNSServerFactory(clients=clients)
         protocol = dns.DNSDatagramProtocol(controller=factory)
-        reactor.listenUDP(10053, protocol)
-        reactor.listenTCP(10053, factory)
+        listener = reactor.listenUDP(0, protocol)
 
         def print_listening():
-            print("DNS server running on port 10053")
+            host = listener.getHost()
+            print("%s:%s" % (host.host, host.port))
 
     reactor.callWhenRunning(print_listening)
     reactor.run()
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index dfc1cf448..5d381c368 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -281,9 +281,7 @@ class CrawlerRunnerHasSpider(unittest.TestCase):
             self.assertNotIn("Asyncio reactor is installed", str(log))
 
 
-class CrawlerProcessSubprocess(unittest.TestCase):
-    script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerProcess')
-
+class ScriptRunnerMixin:
     def run_script(self, script_name):
         script_path = os.path.join(self.script_dir, script_name)
         args = (sys.executable, script_path)
@@ -292,6 +290,10 @@ class CrawlerProcessSubprocess(unittest.TestCase):
         stdout, stderr = p.communicate()
         return stderr.decode('utf-8')
 
+
+class CrawlerProcessSubprocess(ScriptRunnerMixin, unittest.TestCase):
+    script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerProcess')
+
     def test_simple(self):
         log = self.run_script('simple.py')
         self.assertIn('Spider closed (finished)', log)
@@ -325,9 +327,13 @@ class CrawlerProcessSubprocess(unittest.TestCase):
             "'downloader/exception_type_count/twisted.internet.error.ConnectError': 1," in log,
         ]))
 
+
+class CrawlerRunnerSubprocess(ScriptRunnerMixin, unittest.TestCase):
+    script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerRunner')
+
     def test_response_ip_address(self):
         log = self.run_script("ip_address.py")
-        self.assertIn("Spider closed (finished)", log)
-        self.assertIn("Host: not.a.real.domain", log)
-        self.assertIn("Type: <class 'ipaddress.IPv4Address'>", log)
-        self.assertIn("IP address: 127.0.0.1", log)
+        self.assertIn("INFO: Spider closed (finished)", log)
+        self.assertIn("INFO: Host: not.a.real.domain", log)
+        self.assertIn("INFO: Type: <class 'ipaddress.IPv4Address'>", log)
+        self.assertIn("INFO: IP address: 127.0.0.1", log)

From 13670f0397ba8dcec3dceb1852bad5751406d19d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 3 Feb 2020 16:16:43 -0300
Subject: [PATCH 021/181] Ignore tests/CrawlerRunner directory

---
 conftest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conftest.py b/conftest.py
index c0de09909..55294feca 100644
--- a/conftest.py
+++ b/conftest.py
@@ -11,7 +11,8 @@ collect_ignore = [
     # not a test, but looks like a test
     "scrapy/utils/testsite.py",
     # contains scripts to be run by tests/test_crawler.py::CrawlerProcessSubprocess
-    *_py_files("tests/CrawlerProcess")
+    *_py_files("tests/CrawlerProcess"),
+    *_py_files("tests/CrawlerRunner"),
 ]
 
 for line in open('tests/ignores.txt'):

From ad70497416527c3d882a64f7803e73155f3fa1da Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Tue, 4 Feb 2020 13:30:13 -0300
Subject: [PATCH 022/181] Remove unnecessary parentheses in class definition

---
 tests/mockserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/mockserver.py b/tests/mockserver.py
index 08a81418c..30d9bc0e8 100644
--- a/tests/mockserver.py
+++ b/tests/mockserver.py
@@ -243,7 +243,7 @@ class MockDNSResolver:
         return defer.succeed(self._resolve(name))
 
 
-class MockDNSServer():
+class MockDNSServer:
 
     def __enter__(self):
         self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', '-t', 'dns'],

From a64fa2f0866c10594f1e5cf00a0161f9fea1eb62 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 10 Feb 2020 10:16:05 -0300
Subject: [PATCH 023/181] Keyword arguments when creating a _ResponseReader

---
 scrapy/core/downloader/handlers/http11.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 49c9eacac..7a1a77b23 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -417,14 +417,14 @@ class ScrapyAgent:
         d = defer.Deferred(_cancel)
         txresponse.deliverBody(
             _ResponseReader(
-                d,
-                txresponse,
-                request,
-                maxsize,
-                warnsize,
-                fail_on_dataloss,
-                self._crawler,
-                self._source,
+                finished=d,
+                txresponse=txresponse,
+                request=request,
+                maxsize=maxsize,
+                warnsize=warnsize,
+                fail_on_dataloss=fail_on_dataloss,
+                crawler=self._crawler,
+                source=self._source,
             )
         )
 

From 122ce6d6fb3861d99ba2f2810b2370056bae1190 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 10 Feb 2020 10:20:26 -0300
Subject: [PATCH 024/181] Check bytes are received in order (bytes_received
 signal)

---
 tests/test_engine.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index c83a23b55..0d970928b 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -12,7 +12,6 @@ module with the ``runserver`` argument::
 
 import os
 import re
-import string
 import sys
 from collections import defaultdict
 from urllib.parse import urlparse
@@ -91,7 +90,8 @@ def start_test_site(debug=False):
     r = static.File(root_dir)
     r.putChild(b"redirect", util.Redirect(b"/redirected"))
     r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
-    r.putChild(b"random", static.Data(string.ascii_letters.encode("utf8") * 2**14, "text/plain"))
+    numbers = [str(x).encode("utf8") for x in range(2**14)]
+    r.putChild(b"numbers", static.Data(b"".join(numbers), "text/plain"))
 
     port = reactor.listenTCP(0, server.Site(r), interface="127.0.0.1")
     if debug:
@@ -124,7 +124,7 @@ class CrawlerRun(object):
             self.geturl("/"),
             self.geturl("/redirect"),
             self.geturl("/redirect"),  # duplicate
-            self.geturl("/random"),
+            self.geturl("/numbers"),
         ]
 
         for name, signal in vars(signals).items():
@@ -315,8 +315,12 @@ class EngineTest(unittest.TestCase):
                     b"  </body>\n"
                     b"</html>\n"
                 )
-            elif self.run.getpath(request.url) == "/random":
-                self.assertTrue(len(data) > 1)  # signal was fired multiple times
+            elif self.run.getpath(request.url) == "/numbers":
+                # signal was fired multiple times
+                self.assertTrue(len(data) > 1)
+                # bytes were received in order
+                numbers = [str(x).encode("utf8") for x in range(2**14)]
+                self.assertEqual(joined_data, b"".join(numbers))
 
     def _assert_signals_caught(self):
         assert signals.engine_started in self.run.signals_caught

From 42b4e9b3372ce3f9da57c7512b31a3c455b8a161 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 10 Feb 2020 11:23:38 -0300
Subject: [PATCH 025/181] Reword signal docs

---
 docs/topics/signals.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 3a15bf95c..dfb87cef3 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -279,7 +279,7 @@ request_scheduled
     Sent when the engine schedules a :class:`~scrapy.http.Request`, to be
     downloaded later.
 
-    The signal does not support returning deferreds from its handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached the scheduler
     :type request: :class:`~scrapy.http.Request` object
@@ -296,7 +296,7 @@ request_dropped
     Sent when a :class:`~scrapy.http.Request`, scheduled by the engine to be
     downloaded later, is rejected by the scheduler.
 
-    The signal does not support returning deferreds from its handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached the scheduler
     :type request: :class:`~scrapy.http.Request` object
@@ -312,7 +312,7 @@ request_reached_downloader
 
     Sent when a :class:`~scrapy.http.Request` reached downloader.
 
-    The signal does not support returning deferreds from its handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached downloader
     :type request: :class:`~scrapy.http.Request` object

From 13ba9bc629cb0a77ebaca36a10a0a4984d7cce68 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 10 Feb 2020 12:29:39 -0300
Subject: [PATCH 026/181] Note about Response.ip_address

---
 docs/topics/request-response.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 17eb63064..89e570028 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -685,6 +685,8 @@ Response objects
     .. attribute:: Response.ip_address
 
         The IP address of the server from which the Response originated.
+        This attribute is currently only populated by the HTTP 1.1 download
+        handler, i.e. for ``http(s)`` responses.
 
     .. method:: Response.copy()
 

From 037ae5b22e6d6600dc537ee5073652ce74e5f47b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Mon, 10 Feb 2020 19:54:47 +0100
Subject: [PATCH 027/181] =?UTF-8?q?Explicitly=20indicate=20None=20as=20ip?=
 =?UTF-8?q?=5Faddress=E2=80=99s=20default=20value?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/topics/request-response.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 89e570028..8f2504a33 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -685,8 +685,10 @@ Response objects
     .. attribute:: Response.ip_address
 
         The IP address of the server from which the Response originated.
+        
         This attribute is currently only populated by the HTTP 1.1 download
-        handler, i.e. for ``http(s)`` responses.
+        handler, i.e. for ``http(s)`` responses. For other handlers, 
+        :attr:`ip_address` is always ``None``.
 
     .. method:: Response.copy()
 

From f85bf77da3c8943f0791dcae893e8294c4d118d7 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 23 Feb 2020 18:31:13 -0300
Subject: [PATCH 028/181] Restore unrelated change

---
 scrapy/resolver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapy/resolver.py b/scrapy/resolver.py
index f69894b1e..554a3a14d 100644
--- a/scrapy/resolver.py
+++ b/scrapy/resolver.py
@@ -29,7 +29,7 @@ class CachingThreadedResolver(ThreadedResolver):
             cache_size = 0
         return cls(reactor, cache_size, crawler.settings.getfloat('DNS_TIMEOUT'))
 
-    def install_on_reactor(self):
+    def install_on_reactor(self,):
         self.reactor.installResolver(self)
 
     def getHostByName(self, name, timeout=None):

From 889b4718520220d1a81e702ff754ec210a7d3c79 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 23 Feb 2020 18:40:43 -0300
Subject: [PATCH 029/181] Import changes

---
 scrapy/core/downloader/handlers/http11.py | 4 ++--
 tests/test_crawl.py                       | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index e72275021..190ae1d3b 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -1,11 +1,11 @@
 """Download handlers for http and https schemes"""
 
+import ipaddress
 import logging
 import re
 import warnings
 from contextlib import suppress
 from io import BytesIO
-from ipaddress import ip_address
 from time import time
 from urllib.parse import urldefrag
 
@@ -468,7 +468,7 @@ class _ResponseReader(protocol.Protocol):
                 self._certificate = ssl.Certificate(self.transport._producer.getPeerCertificate())
 
         if self._ip_address is None:
-            self._ip_address = ip_address(self.transport._producer.getPeer().host)
+            self._ip_address = ipaddress.ip_address(self.transport._producer.getPeer().host)
 
     def dataReceived(self, bodyBytes):
         # This maybe called several times after cancel was called with buffered data.
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 3a9b00ab3..3c110e7a6 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -441,13 +441,15 @@ with multiples lines
         self.assertEqual(cert.getIssuer().commonName, b"localhost")
 
     @defer.inlineCallbacks
-    def test_dns_server_ip_address(self):
+    def test_dns_server_ip_address_none(self):
         crawler = self.runner.create_crawler(SingleRequestSpider)
         url = self.mockserver.url('/status?n=200')
         yield crawler.crawl(seed=url, mockserver=self.mockserver)
         ip_address = crawler.spider.meta['responses'][0].ip_address
         self.assertIsNone(ip_address)
 
+    @defer.inlineCallbacks
+    def test_dns_server_ip_address(self):
         crawler = self.runner.create_crawler(SingleRequestSpider)
         url = self.mockserver.url('/echo?body=test')
         expected_netloc, _ = urlparse(url).netloc.split(':')

From 91a78eef3ee9de033e66db55c49321b2cc43740e Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 8 Mar 2020 22:32:17 -0300
Subject: [PATCH 030/181] Pass callback results as dicts instead of tuples

---
 scrapy/core/downloader/handlers/http11.py | 56 ++++++++++++++++-------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 190ae1d3b..e904cbc05 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -384,7 +384,13 @@ class ScrapyAgent(object):
     def _cb_bodyready(self, txresponse, request):
         # deliverBody hangs for responses without body
         if txresponse.length == 0:
-            return txresponse, b'', None, None
+            return {
+                "txresponse": txresponse,
+                "body": b"",
+                "flags": None,
+                "certificate": None,
+                "ip_address": None,
+            }
 
         maxsize = request.meta.get('download_maxsize', self._maxsize)
         warnsize = request.meta.get('download_warnsize', self._warnsize)
@@ -420,12 +426,18 @@ class ScrapyAgent(object):
         return d
 
     def _cb_bodydone(self, result, request, url):
-        txresponse, body, flags, certificate, ip_address = result
-        status = int(txresponse.code)
-        headers = Headers(txresponse.headers.getAllRawHeaders())
-        respcls = responsetypes.from_args(headers=headers, url=url, body=body)
-        return respcls(url=url, status=status, headers=headers, body=body,
-                       flags=flags, certificate=certificate, ip_address=ip_address)
+        status = int(result["txresponse"].code)
+        headers = Headers(result["txresponse"].headers.getAllRawHeaders())
+        respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
+        return respcls(
+            url=url,
+            status=status,
+            headers=headers,
+            body=result["body"],
+            flags=result["flags"],
+            certificate=result["certificate"],
+            ip_address=result["ip_address"],
+        )
 
 
 @implementer(IBodyProducer)
@@ -501,22 +513,34 @@ class _ResponseReader(protocol.Protocol):
 
         body = self._bodybuf.getvalue()
         if reason.check(ResponseDone):
-            self._finished.callback(
-                (self._txresponse, body, None, self._certificate, self._ip_address)
-            )
+            self._finished.callback({
+                "txresponse": self._txresponse,
+                "body": body,
+                "flags": None,
+                "certificate": self._certificate,
+                "ip_address": self._ip_address,
+            })
             return
 
         if reason.check(PotentialDataLoss):
-            self._finished.callback(
-                (self._txresponse, body, ['partial'], self._certificate, self._ip_address)
-            )
+            self._finished.callback({
+                "txresponse": self._txresponse,
+                "body": body,
+                "flags": ["partial"],
+                "certificate": self._certificate,
+                "ip_address": self._ip_address,
+            })
             return
 
         if reason.check(ResponseFailed) and any(r.check(_DataLoss) for r in reason.value.reasons):
             if not self._fail_on_dataloss:
-                self._finished.callback(
-                    (self._txresponse, body, ['dataloss'], self._certificate, self._ip_address)
-                )
+                self._finished.callback({
+                    "txresponse": self._txresponse,
+                    "body": body,
+                    "flags": ["dataloss"],
+                    "certificate": self._certificate,
+                    "ip_address": self._ip_address,
+                })
                 return
 
             elif not self._fail_on_dataloss_warned:

From 1785095707dec53647c835c0b0861b220e8495af Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 11 Mar 2020 20:41:59 -0300
Subject: [PATCH 031/181] Remove single-use variable

---
 scrapy/core/downloader/handlers/http11.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index e904cbc05..a5b03a62b 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -426,12 +426,11 @@ class ScrapyAgent(object):
         return d
 
     def _cb_bodydone(self, result, request, url):
-        status = int(result["txresponse"].code)
         headers = Headers(result["txresponse"].headers.getAllRawHeaders())
         respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
         return respcls(
             url=url,
-            status=status,
+            status=int(result["txresponse"].code),
             headers=headers,
             body=result["body"],
             flags=result["flags"],

From ca08e04198b94bd9583704f86316b57af3408adc Mon Sep 17 00:00:00 2001
From: Aditya <k.aditya00@gmail.com>
Date: Fri, 20 Mar 2020 02:31:35 +0530
Subject: [PATCH 032/181] [docs] update redirect links python2 -> python3

---
 docs/topics/downloader-middleware.rst |  5 ++---
 docs/topics/email.rst                 |  2 +-
 docs/topics/exporters.rst             |  8 ++++----
 docs/topics/extensions.rst            |  2 +-
 docs/topics/items.rst                 |  6 +++---
 docs/topics/logging.rst               | 16 ++++++++--------
 docs/topics/request-response.rst      | 10 +++++-----
 docs/topics/selectors.rst             |  2 +-
 docs/topics/settings.rst              |  6 +++---
 docs/topics/spider-middleware.rst     |  6 +++---
 10 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index 73648994d..61a3806fb 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -739,7 +739,7 @@ HttpProxyMiddleware
    This middleware sets the HTTP proxy to use for requests, by setting the
    ``proxy`` meta value for :class:`~scrapy.http.Request` objects.
 
-   Like the Python standard library modules `urllib`_ and `urllib2`_, it obeys
+   Like the Python standard library module `urllib.request`_, it obeys
    the following environment variables:
 
    * ``http_proxy``
@@ -751,8 +751,7 @@ HttpProxyMiddleware
    Keep in mind this value will take precedence over ``http_proxy``/``https_proxy``
    environment variables, and it will also ignore ``no_proxy`` environment variable.
 
-.. _urllib: https://docs.python.org/2/library/urllib.html
-.. _urllib2: https://docs.python.org/2/library/urllib2.html
+.. _urllib.request: https://docs.python.org/3/library/urllib.request.html
 
 RedirectMiddleware
 ------------------
diff --git a/docs/topics/email.rst b/docs/topics/email.rst
index 72bf52227..aed3deb2e 100644
--- a/docs/topics/email.rst
+++ b/docs/topics/email.rst
@@ -15,7 +15,7 @@ IO of the crawler. It also provides a simple API for sending attachments and
 it's very easy to configure, with a few :ref:`settings
 <topics-email-settings>`.
 
-.. _smtplib: https://docs.python.org/2/library/smtplib.html
+.. _smtplib: https://docs.python.org/3/library/smtplib.html
 
 Quick example
 =============
diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index e52682690..4ba8714bd 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -320,7 +320,7 @@ CsvItemExporter
       Color TV,1200
       DVD player,200
 
-.. _csv.writer: https://docs.python.org/2/library/csv.html#csv.writer
+.. _csv.writer: https://docs.python.org/3/library/csv.html#csv.writer
 
 PickleItemExporter
 ------------------
@@ -342,7 +342,7 @@ PickleItemExporter
 
    Pickle isn't a human readable format, so no output examples are provided.
 
-.. _pickle module documentation: https://docs.python.org/2/library/pickle.html
+.. _pickle module documentation: https://docs.python.org/3/library/pickle.html
 
 PprintItemExporter
 ------------------
@@ -393,7 +393,7 @@ JsonItemExporter
       stream-friendly format, consider using :class:`JsonLinesItemExporter`
       instead, or splitting the output in multiple chunks.
 
-.. _JSONEncoder: https://docs.python.org/2/library/json.html#json.JSONEncoder
+.. _JSONEncoder: https://docs.python.org/3/library/json.html#json.JSONEncoder
 
 JsonLinesItemExporter
 ---------------------
@@ -417,7 +417,7 @@ JsonLinesItemExporter
    Unlike the one produced by :class:`JsonItemExporter`, the format produced by
    this exporter is well suited for serializing large amounts of data.
 
-.. _JSONEncoder: https://docs.python.org/2/library/json.html#json.JSONEncoder
+.. _JSONEncoder: https://docs.python.org/3/library/json.html#json.JSONEncoder
 
 MarshalItemExporter
 -------------------
diff --git a/docs/topics/extensions.rst b/docs/topics/extensions.rst
index 94fd2e36e..f57e37e6f 100644
--- a/docs/topics/extensions.rst
+++ b/docs/topics/extensions.rst
@@ -372,5 +372,5 @@ For more info see `Debugging in Python`_.
 
 This extension only works on POSIX-compliant platforms (i.e. not Windows).
 
-.. _Python debugger: https://docs.python.org/2/library/pdb.html
+.. _Python debugger: https://docs.python.org/3/library/pdb.html
 .. _Debugging in Python: https://pythonconquerstheuniverse.wordpress.com/2009/09/10/debugging-in-python/
diff --git a/docs/topics/items.rst b/docs/topics/items.rst
index 44643cb67..36731571e 100644
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@@ -24,7 +24,7 @@ serialization can be customized using Item fields metadata, :mod:`trackref`
 tracks Item instances to help find memory leaks
 (see :ref:`topics-leaks-trackrefs`), etc.
 
-.. _dictionary-like: https://docs.python.org/2/library/stdtypes.html#dict
+.. _dictionary-like: https://docs.python.org/3/library/stdtypes.html#dict
 
 .. _topics-items-declaring:
 
@@ -249,7 +249,7 @@ Item objects
         :class:`Field` objects used in the :ref:`Item declaration
         <topics-items-declaring>`.
 
-.. _dict API: https://docs.python.org/2/library/stdtypes.html#dict
+.. _dict API: https://docs.python.org/3/library/stdtypes.html#dict
 
 Field objects
 =============
@@ -262,7 +262,7 @@ Field objects
     to support the :ref:`item declaration syntax <topics-items-declaring>`
     based on class attributes.
 
-.. _dict: https://docs.python.org/2/library/stdtypes.html#dict
+.. _dict: https://docs.python.org/3/library/stdtypes.html#dict
 
 
 Other classes related to Item
diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
index d4d22d889..a85e1a769 100644
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -83,10 +83,10 @@ path::
 
 .. seealso::
 
-    Module logging, `HowTo <https://docs.python.org/2/howto/logging.html>`_
+    Module logging, `HowTo <https://docs.python.org/3/howto/logging.html>`_
         Basic Logging Tutorial
 
-    Module logging, `Loggers <https://docs.python.org/2/library/logging.html#logger-objects>`_
+    Module logging, `Loggers <https://docs.python.org/3/library/logging.html#logger-objects>`_
         Further documentation on loggers
 
 .. _topics-logging-from-spiders:
@@ -166,13 +166,13 @@ possible levels listed in :ref:`topics-logging-levels`.
 :setting:`LOG_FORMAT` and :setting:`LOG_DATEFORMAT` specify formatting strings
 used as layouts for all messages. Those strings can contain any placeholders
 listed in `logging's logrecord attributes docs
-<https://docs.python.org/2/library/logging.html#logrecord-attributes>`_ and
+<https://docs.python.org/3/library/logging.html#logrecord-attributes>`_ and
 `datetime's strftime and strptime directives
-<https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_
+<https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior>`_
 respectively.
 
 If :setting:`LOG_SHORT_NAMES` is set, then the logs will not display the Scrapy
-component that prints the log. It is unset by default, hence logs contain the 
+component that prints the log. It is unset by default, hence logs contain the
 Scrapy component responsible for that log output.
 
 Command-line options
@@ -190,7 +190,7 @@ to override some of the Scrapy settings regarding logging.
 
 .. seealso::
 
-    Module `logging.handlers <https://docs.python.org/2/library/logging.handlers.html>`_
+    Module `logging.handlers <https://docs.python.org/3/library/logging.handlers.html>`_
         Further documentation on available handlers
 
 .. _custom-log-formats:
@@ -201,7 +201,7 @@ Custom Log Formats
 A custom log format can be set for different actions by extending
 :class:`~scrapy.logformatter.LogFormatter` class and making
 :setting:`LOG_FORMATTER` point to your new class.
- 
+
 .. autoclass:: scrapy.logformatter.LogFormatter
    :members:
 
@@ -276,6 +276,6 @@ scrapy.utils.log module
     Refer to :ref:`run-from-script` for more details about using Scrapy this
     way.
 
-.. _logging.basicConfig(): https://docs.python.org/2/library/logging.html#logging.basicConfig
+.. _logging.basicConfig(): https://docs.python.org/3/library/logging.html#logging.basicConfig
 
 
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index b2a60ff39..6c5a08409 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -189,7 +189,7 @@ Request objects
         ``copy()`` or ``replace()`` methods, and can also be accessed, in your
         spider, from the ``response.cb_kwargs`` attribute.
 
-    .. _shallow copied: https://docs.python.org/2/library/copy.html
+    .. _shallow copied: https://docs.python.org/3/library/copy.html
 
     .. method:: Request.copy()
 
@@ -706,7 +706,7 @@ Response objects
 
         A :class:`twisted.internet.ssl.Certificate` object representing
         the server's SSL certificate.
-        
+
         Only populated for ``https`` responses, ``None`` otherwise.
 
     .. method:: Response.copy()
@@ -724,17 +724,17 @@ Response objects
         Constructs an absolute url by combining the Response's :attr:`url` with
         a possible relative url.
 
-        This is a wrapper over `urlparse.urljoin`_, it's merely an alias for
+        This is a wrapper over `urllib.parse.urljoin`_, it's merely an alias for
         making this call::
 
-            urlparse.urljoin(response.url, url)
+            urllib.parse.urljoin(response.url, url)
 
     .. automethod:: Response.follow
 
     .. automethod:: Response.follow_all
 
 
-.. _urlparse.urljoin: https://docs.python.org/2/library/urlparse.html#urlparse.urljoin
+.. _urllib.parse.urljoin: https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
 
 .. _topics-request-response-ref-response-subclasses:
 
diff --git a/docs/topics/selectors.rst b/docs/topics/selectors.rst
index 1f7802c98..0f90b28c0 100644
--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@@ -36,7 +36,7 @@ defines selectors to associate those styles with specific HTML elements.
 
 .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
 .. _lxml: https://lxml.de/
-.. _ElementTree: https://docs.python.org/2/library/xml.etree.elementtree.html
+.. _ElementTree: https://docs.python.org/3/library/xml.etree.elementtree.html
 .. _XPath: https://www.w3.org/TR/xpath/all/
 .. _CSS: https://www.w3.org/TR/selectors
 .. _parsel: https://parsel.readthedocs.io/en/latest/
diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index dc6843d75..d78a6253e 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -28,7 +28,7 @@ The value of ``SCRAPY_SETTINGS_MODULE`` should be in Python path syntax, e.g.
 ``myproject.settings``. Note that the settings module should be on the
 Python `import search path`_.
 
-.. _import search path: https://docs.python.org/2/tutorial/modules.html#the-module-search-path
+.. _import search path: https://docs.python.org/3/tutorial/modules.html#the-module-search-path
 
 .. _populating-settings:
 
@@ -902,7 +902,7 @@ Default: ``'%(asctime)s [%(name)s] %(levelname)s: %(message)s'``
 String for formatting log messages. Refer to the `Python logging documentation`_ for the whole list of available
 placeholders.
 
-.. _Python logging documentation: https://docs.python.org/2/library/logging.html#logrecord-attributes
+.. _Python logging documentation: https://docs.python.org/3/library/logging.html#logrecord-attributes
 
 .. setting:: LOG_DATEFORMAT
 
@@ -915,7 +915,7 @@ String for formatting date/time, expansion of the ``%(asctime)s`` placeholder
 in :setting:`LOG_FORMAT`. Refer to the `Python datetime documentation`_ for the whole list of available
 directives.
 
-.. _Python datetime documentation: https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
+.. _Python datetime documentation: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
 
 .. setting:: LOG_FORMATTER
 
diff --git a/docs/topics/spider-middleware.rst b/docs/topics/spider-middleware.rst
index 0e8210130..3d7450c86 100644
--- a/docs/topics/spider-middleware.rst
+++ b/docs/topics/spider-middleware.rst
@@ -173,18 +173,18 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
         :type spider: :class:`~scrapy.spiders.Spider` object
 
     .. method:: from_crawler(cls, crawler)
-    
+
        If present, this classmethod is called to create a middleware instance
        from a :class:`~scrapy.crawler.Crawler`. It must return a new instance
        of the middleware. Crawler object provides access to all Scrapy core
        components like settings and signals; it is a way for middleware to
        access them and hook its functionality into Scrapy.
-    
+
        :param crawler: crawler that uses this middleware
        :type crawler: :class:`~scrapy.crawler.Crawler` object
 
 
-.. _Exception: https://docs.python.org/2/library/exceptions.html#exceptions.Exception
+.. _Exception: https://docs.python.org/3/library/exceptions.html#Exception
 
 
 .. _topics-spider-middleware-ref:

From f37b1bdc5616f67460c645e26c49f9d5b34e3631 Mon Sep 17 00:00:00 2001
From: Aditya <k.aditya00@gmail.com>
Date: Fri, 20 Mar 2020 05:22:51 +0530
Subject: [PATCH 033/181] [docs] update redirect links to python3

---
 docs/intro/tutorial.rst               | 10 +++++-----
 docs/topics/contracts.rst             |  4 +---
 docs/topics/downloader-middleware.rst | 11 +++--------
 docs/topics/dynamic-content.rst       | 10 ++++------
 docs/topics/email.rst                 |  4 +---
 docs/topics/exporters.rst             | 20 ++++++--------------
 docs/topics/extensions.rst            |  3 +--
 docs/topics/items.rst                 | 21 ++++++---------------
 docs/topics/logging.rst               | 15 +++++----------
 docs/topics/request-response.rst      |  8 ++------
 docs/topics/selectors.rst             |  3 +--
 docs/topics/spider-middleware.rst     |  6 +-----
 docs/topics/spiders.rst               |  4 +---
 docs/topics/telnetconsole.rst         | 11 ++++-------
 scrapy/item.py                        |  4 +---
 15 files changed, 42 insertions(+), 92 deletions(-)

diff --git a/docs/intro/tutorial.rst b/docs/intro/tutorial.rst
index 1768badbb..ab6fd4829 100644
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@@ -25,16 +25,16 @@ Scrapy.
 If you're already familiar with other languages, and want to learn Python quickly, the `Python Tutorial`_ is a good resource.
 
 If you're new to programming and want to start with Python, the following books
-may be useful to you: 
+may be useful to you:
 
 * `Automate the Boring Stuff With Python`_
 
-* `How To Think Like a Computer Scientist`_ 
+* `How To Think Like a Computer Scientist`_
 
-* `Learn Python 3 The Hard Way`_ 
+* `Learn Python 3 The Hard Way`_
 
 You can also take a look at `this list of Python resources for non-programmers`_,
-as well as the `suggested resources in the learnpython-subreddit`_. 
+as well as the `suggested resources in the learnpython-subreddit`_.
 
 .. _Python: https://www.python.org/
 .. _this list of Python resources for non-programmers: https://wiki.python.org/moin/BeginnersGuide/NonProgrammers
@@ -62,7 +62,7 @@ This will create a ``tutorial`` directory with the following contents::
             __init__.py
 
             items.py          # project items definition file
-            
+
             middlewares.py    # project middlewares file
 
             pipelines.py      # project pipelines file
diff --git a/docs/topics/contracts.rst b/docs/topics/contracts.rst
index 43db8f101..319f577bc 100644
--- a/docs/topics/contracts.rst
+++ b/docs/topics/contracts.rst
@@ -136,7 +136,7 @@ Detecting check runs
 ====================
 
 When ``scrapy check`` is running, the ``SCRAPY_CHECK`` environment variable is
-set to the ``true`` string. You can use `os.environ`_ to perform any change to
+set to the ``true`` string. You can use :data:`os.environ` to perform any change to
 your spiders or your settings when ``scrapy check`` is used::
 
     import os
@@ -148,5 +148,3 @@ your spiders or your settings when ``scrapy check`` is used::
         def __init__(self):
             if os.environ.get('SCRAPY_CHECK'):
                 pass  # Do some scraper adjustments when a check is running
-
-.. _os.environ: https://docs.python.org/3/library/os.html#os.environ
diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index 61a3806fb..d7ec53bfa 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -739,7 +739,7 @@ HttpProxyMiddleware
    This middleware sets the HTTP proxy to use for requests, by setting the
    ``proxy`` meta value for :class:`~scrapy.http.Request` objects.
 
-   Like the Python standard library module `urllib.request`_, it obeys
+   Like the Python standard library module :mod:`urllib.request`, it obeys
    the following environment variables:
 
    * ``http_proxy``
@@ -751,8 +751,6 @@ HttpProxyMiddleware
    Keep in mind this value will take precedence over ``http_proxy``/``https_proxy``
    environment variables, and it will also ignore ``no_proxy`` environment variable.
 
-.. _urllib.request: https://docs.python.org/3/library/urllib.request.html
-
 RedirectMiddleware
 ------------------
 
@@ -982,7 +980,7 @@ RobotsTxtMiddleware
     Scrapy ships with support for the following robots.txt_ parsers:
 
     * :ref:`Protego <protego-parser>` (default)
-    * :ref:`RobotFileParser <python-robotfileparser>`
+    * :class:`~urllib.robotparser.RobotFileParser`
     * :ref:`Reppy <reppy-parser>`
     * :ref:`Robotexclusionrulesparser <rerp-parser>`
 
@@ -1030,13 +1028,10 @@ Based on `Protego <https://github.com/scrapy/protego>`_:
 
 Scrapy uses this parser by default.
 
-.. _python-robotfileparser:
-
 RobotFileParser
 ~~~~~~~~~~~~~~~
 
-Based on `RobotFileParser
-<https://docs.python.org/3.7/library/urllib.robotparser.html>`_:
+Based on :class:`~urllib.robotparser.RobotFileParser`:
 
 * is Python's built-in robots.txt_ parser
 
diff --git a/docs/topics/dynamic-content.rst b/docs/topics/dynamic-content.rst
index b98133676..22bcac268 100644
--- a/docs/topics/dynamic-content.rst
+++ b/docs/topics/dynamic-content.rst
@@ -115,7 +115,7 @@ data from it depends on the type of response:
 -   If the response is HTML or XML, use :ref:`selectors
     <topics-selectors>` as usual.
 
--   If the response is JSON, use `json.loads`_ to load the desired data from
+-   If the response is JSON, use :func:`json.loads` to load the desired data from
     :attr:`response.text <scrapy.http.TextResponse.text>`::
 
         data = json.loads(response.text)
@@ -130,7 +130,7 @@ data from it depends on the type of response:
 -   If the response is JavaScript, or HTML with a ``<script/>`` element
     containing the desired data, see :ref:`topics-parsing-javascript`.
 
--   If the response is CSS, use a `regular expression`_ to extract the desired
+-   If the response is CSS, use :mod:`re` to extract the desired
     data from :attr:`response.text <scrapy.http.TextResponse.text>`.
 
 .. _topics-parsing-images:
@@ -168,8 +168,8 @@ JavaScript code:
 Once you have a string with the JavaScript code, you can extract the desired
 data from it:
 
--   You might be able to use a `regular expression`_ to extract the desired
-    data in JSON format, which you can then parse with `json.loads`_.
+-   You might be able to use :mod:`re` to extract the desired
+    data in JSON format, which you can then parse with :func:`json.loads`.
 
     For example, if the JavaScript code contains a separate line like
     ``var data = {"field": "value"};`` you can extract that data as follows:
@@ -241,9 +241,7 @@ along with `scrapy-selenium`_ for seamless integration.
 .. _headless browser: https://en.wikipedia.org/wiki/Headless_browser
 .. _JavaScript: https://en.wikipedia.org/wiki/JavaScript
 .. _js2xml: https://github.com/scrapinghub/js2xml
-.. _json.loads: https://docs.python.org/3/library/json.html#json.loads
 .. _pytesseract: https://github.com/madmaze/pytesseract
-.. _regular expression: https://docs.python.org/3/library/re.html
 .. _scrapy-selenium: https://github.com/clemfromspace/scrapy-selenium
 .. _scrapy-splash: https://github.com/scrapy-plugins/scrapy-splash
 .. _Selenium: https://www.selenium.dev/
diff --git a/docs/topics/email.rst b/docs/topics/email.rst
index aed3deb2e..e347c3a35 100644
--- a/docs/topics/email.rst
+++ b/docs/topics/email.rst
@@ -7,7 +7,7 @@ Sending e-mail
 .. module:: scrapy.mail
    :synopsis: Email sending facility
 
-Although Python makes sending e-mails relatively easy via the `smtplib`_
+Although Python makes sending e-mails relatively easy via the :mod:`smtplib`
 library, Scrapy provides its own facility for sending e-mails which is very
 easy to use and it's implemented using :doc:`Twisted non-blocking IO
 <twisted:core/howto/defer-intro>`, to avoid interfering with the non-blocking
@@ -15,8 +15,6 @@ IO of the crawler. It also provides a simple API for sending attachments and
 it's very easy to configure, with a few :ref:`settings
 <topics-email-settings>`.
 
-.. _smtplib: https://docs.python.org/3/library/smtplib.html
-
 Quick example
 =============
 
diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index 4ba8714bd..f73c6728d 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -311,7 +311,7 @@ CsvItemExporter
 
    The additional keyword arguments of this ``__init__`` method are passed to the
    :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to the
-   `csv.writer`_ ``__init__`` method, so you can use any ``csv.writer`` ``__init__`` method
+   :func:`csv.writer` ``__init__`` method, so you can use any ``csv.writer`` ``__init__`` method
    argument to customize this exporter.
 
    A typical output of this exporter would be::
@@ -320,8 +320,6 @@ CsvItemExporter
       Color TV,1200
       DVD player,200
 
-.. _csv.writer: https://docs.python.org/3/library/csv.html#csv.writer
-
 PickleItemExporter
 ------------------
 
@@ -335,15 +333,13 @@ PickleItemExporter
    :param protocol: The pickle protocol to use.
    :type protocol: int
 
-   For more information, refer to the `pickle module documentation`_.
+   For more information, refer :mod:`pickle`.
 
    The additional keyword arguments of this ``__init__`` method are passed to the
    :class:`BaseItemExporter` ``__init__`` method.
 
    Pickle isn't a human readable format, so no output examples are provided.
 
-.. _pickle module documentation: https://docs.python.org/3/library/pickle.html
-
 PprintItemExporter
 ------------------
 
@@ -372,8 +368,8 @@ JsonItemExporter
    Exports Items in JSON format to the specified file-like object, writing all
    objects as a list of objects. The additional ``__init__`` method arguments are
    passed to the :class:`BaseItemExporter` ``__init__`` method, and the leftover
-   arguments to the `JSONEncoder`_ ``__init__`` method, so you can use any
-   `JSONEncoder`_ ``__init__`` method argument to customize this exporter.
+   arguments to the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
+   :class:`~json.JSONEncoder` ``__init__`` method argument to customize this exporter.
 
    :param file: the file-like object to use for exporting the data. Its ``write`` method should
                 accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@@ -393,8 +389,6 @@ JsonItemExporter
       stream-friendly format, consider using :class:`JsonLinesItemExporter`
       instead, or splitting the output in multiple chunks.
 
-.. _JSONEncoder: https://docs.python.org/3/library/json.html#json.JSONEncoder
-
 JsonLinesItemExporter
 ---------------------
 
@@ -403,8 +397,8 @@ JsonLinesItemExporter
    Exports Items in JSON format to the specified file-like object, writing one
    JSON-encoded item per line. The additional ``__init__`` method arguments are passed
    to the :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to
-   the `JSONEncoder`_ ``__init__`` method, so you can use any `JSONEncoder`_
-   ``__init__`` method argument to customize this exporter.
+   the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
+   :class:`~json.JSONEncoder` ``__init__`` method argument to customize this exporter.
 
    :param file: the file-like object to use for exporting the data. Its ``write`` method should
                 accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@@ -417,8 +411,6 @@ JsonLinesItemExporter
    Unlike the one produced by :class:`JsonItemExporter`, the format produced by
    this exporter is well suited for serializing large amounts of data.
 
-.. _JSONEncoder: https://docs.python.org/3/library/json.html#json.JSONEncoder
-
 MarshalItemExporter
 -------------------
 
diff --git a/docs/topics/extensions.rst b/docs/topics/extensions.rst
index f57e37e6f..1b8413abf 100644
--- a/docs/topics/extensions.rst
+++ b/docs/topics/extensions.rst
@@ -364,7 +364,7 @@ Debugger extension
 
 .. class:: Debugger
 
-Invokes a `Python debugger`_ inside a running Scrapy process when a `SIGUSR2`_
+Invokes a :mod:`Python debugger <pdb>`: inside a running Scrapy process when a `SIGUSR2`_
 signal is received. After the debugger is exited, the Scrapy process continues
 running normally.
 
@@ -372,5 +372,4 @@ For more info see `Debugging in Python`_.
 
 This extension only works on POSIX-compliant platforms (i.e. not Windows).
 
-.. _Python debugger: https://docs.python.org/3/library/pdb.html
 .. _Debugging in Python: https://pythonconquerstheuniverse.wordpress.com/2009/09/10/debugging-in-python/
diff --git a/docs/topics/items.rst b/docs/topics/items.rst
index 36731571e..2e5c88054 100644
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@@ -15,7 +15,7 @@ especially in a larger project with many spiders.
 
 To define common output data format Scrapy provides the :class:`Item` class.
 :class:`Item` objects are simple containers used to collect the scraped data.
-They provide a `dictionary-like`_ API with a convenient syntax for declaring
+They provide a :class:`dict` like API with a convenient syntax for declaring
 their available fields.
 
 Various Scrapy components use extra information provided by Items:
@@ -24,8 +24,6 @@ serialization can be customized using Item fields metadata, :mod:`trackref`
 tracks Item instances to help find memory leaks
 (see :ref:`topics-leaks-trackrefs`), etc.
 
-.. _dictionary-like: https://docs.python.org/3/library/stdtypes.html#dict
-
 .. _topics-items-declaring:
 
 Declaring Items
@@ -79,7 +77,7 @@ Working with Items
 
 Here are some examples of common tasks performed with items, using the
 ``Product`` item :ref:`declared above  <topics-items-declaring>`. You will
-notice the API is very similar to the `dict API`_.
+notice the API is very similar to the :class:`dict` API.
 
 Creating items
 --------------
@@ -145,7 +143,7 @@ KeyError: 'Product does not support field: lala'
 Accessing all populated values
 ------------------------------
 
-To access all populated values, just use the typical `dict API`_:
+To access all populated values, just use the typical :class:`dict`:
 
 >>> product.keys()
 ['price', 'name']
@@ -175,9 +173,7 @@ other item as well.
 
 If that is not the desired behavior, use a deep copy instead.
 
-See the `documentation of the copy module`_ for more information.
-
-.. _documentation of the copy module: https://docs.python.org/3/library/copy.html
+See :mod:`copy` for more information.
 
 To create a shallow copy of an item, you can either call
 :meth:`~scrapy.item.Item.copy` on an existing item
@@ -235,7 +231,7 @@ Item objects
 
     Return a new Item optionally initialized from the given argument.
 
-    Items replicate the standard `dict API`_, including its ``__init__`` method, and
+    Items replicate the standard :class:`dict`, including its ``__init__`` method, and
     also provide the following additional API members:
 
     .. automethod:: copy
@@ -249,22 +245,17 @@ Item objects
         :class:`Field` objects used in the :ref:`Item declaration
         <topics-items-declaring>`.
 
-.. _dict API: https://docs.python.org/3/library/stdtypes.html#dict
-
 Field objects
 =============
 
 .. class:: Field([arg])
 
-    The :class:`Field` class is just an alias to the built-in `dict`_ class and
+    The :class:`Field` class is just an alias to the built-in :class:`dict` class and
     doesn't provide any extra functionality or attributes. In other words,
     :class:`Field` objects are plain-old Python dicts. A separate class is used
     to support the :ref:`item declaration syntax <topics-items-declaring>`
     based on class attributes.
 
-.. _dict: https://docs.python.org/3/library/stdtypes.html#dict
-
-
 Other classes related to Item
 =============================
 
diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
index a85e1a769..df631b3dc 100644
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -9,8 +9,7 @@ Logging
     explicit calls to the Python standard logging. Keep reading to learn more
     about the new logging system.
 
-Scrapy uses `Python's builtin logging system
-<https://docs.python.org/3/library/logging.html>`_ for event logging. We'll
+Scrapy uses :mod:`logging` for event logging. We'll
 provide some simple examples to get you started, but for more advanced
 use-cases it's strongly suggested to read thoroughly its documentation.
 
@@ -86,7 +85,7 @@ path::
     Module logging, `HowTo <https://docs.python.org/3/howto/logging.html>`_
         Basic Logging Tutorial
 
-    Module logging, `Loggers <https://docs.python.org/3/library/logging.html#logger-objects>`_
+    Module logging, :class:`~logging.Logger`
         Further documentation on loggers
 
 .. _topics-logging-from-spiders:
@@ -190,7 +189,7 @@ to override some of the Scrapy settings regarding logging.
 
 .. seealso::
 
-    Module `logging.handlers <https://docs.python.org/3/library/logging.handlers.html>`_
+    Module :mod:`logging.handlers`
         Further documentation on available handlers
 
 .. _custom-log-formats:
@@ -256,10 +255,10 @@ scrapy.utils.log module
     In that case, its usage is not required but it's recommended.
 
     Another option when running custom scripts is to manually configure the logging.
-    To do this you can use `logging.basicConfig()`_ to set a basic root handler.
+    To do this you can use :func:`logging.basicConfig` to set a basic root handler.
 
     Note that :class:`~scrapy.crawler.CrawlerProcess` automatically calls ``configure_logging``,
-    so it is recommended to only use `logging.basicConfig()`_ together with
+    so it is recommended to only use :func:`logging.basicConfig` together with
     :class:`~scrapy.crawler.CrawlerRunner`.
 
     This is an example on how to redirect ``INFO`` or higher messages to a file::
@@ -275,7 +274,3 @@ scrapy.utils.log module
 
     Refer to :ref:`run-from-script` for more details about using Scrapy this
     way.
-
-.. _logging.basicConfig(): https://docs.python.org/3/library/logging.html#logging.basicConfig
-
-
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 6c5a08409..7260141e9 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -566,12 +566,10 @@ dealing with JSON requests.
       set to ``'POST'`` automatically.
    :type data: JSON serializable object
 
-   :param dumps_kwargs: Parameters that will be passed to underlying `json.dumps`_ method which is used to serialize
+   :param dumps_kwargs: Parameters that will be passed to underlying :func:`json.dumps` method which is used to serialize
        data into JSON format.
    :type dumps_kwargs: dict
 
-.. _json.dumps: https://docs.python.org/3/library/json.html#json.dumps
-
 JsonRequest usage example
 -------------------------
 
@@ -724,7 +722,7 @@ Response objects
         Constructs an absolute url by combining the Response's :attr:`url` with
         a possible relative url.
 
-        This is a wrapper over `urllib.parse.urljoin`_, it's merely an alias for
+        This is a wrapper over :func:`~urllib.parse.urljoin`, it's merely an alias for
         making this call::
 
             urllib.parse.urljoin(response.url, url)
@@ -734,8 +732,6 @@ Response objects
     .. automethod:: Response.follow_all
 
 
-.. _urllib.parse.urljoin: https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
-
 .. _topics-request-response-ref-response-subclasses:
 
 Response subclasses
diff --git a/docs/topics/selectors.rst b/docs/topics/selectors.rst
index 0f90b28c0..bb46ea80f 100644
--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@@ -14,7 +14,7 @@ achieve this, such as:
    drawback: it's slow.
 
  * `lxml`_ is an XML parsing library (which also parses HTML) with a pythonic
-   API based on `ElementTree`_. (lxml is not part of the Python standard
+   API based on :mod:`~xml.etree.ElementTree`. (lxml is not part of the Python standard
    library.)
 
 Scrapy comes with its own mechanism for extracting data. They're called
@@ -36,7 +36,6 @@ defines selectors to associate those styles with specific HTML elements.
 
 .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
 .. _lxml: https://lxml.de/
-.. _ElementTree: https://docs.python.org/3/library/xml.etree.elementtree.html
 .. _XPath: https://www.w3.org/TR/xpath/all/
 .. _CSS: https://www.w3.org/TR/selectors
 .. _parsel: https://parsel.readthedocs.io/en/latest/
diff --git a/docs/topics/spider-middleware.rst b/docs/topics/spider-middleware.rst
index 3d7450c86..d49a2209d 100644
--- a/docs/topics/spider-middleware.rst
+++ b/docs/topics/spider-middleware.rst
@@ -140,7 +140,7 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
         :type response: :class:`~scrapy.http.Response` object
 
         :param exception: the exception raised
-        :type exception: `Exception`_ object
+        :type exception: :exc:`Exception` object
 
         :param spider: the spider which raised the exception
         :type spider: :class:`~scrapy.spiders.Spider` object
@@ -183,10 +183,6 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
        :param crawler: crawler that uses this middleware
        :type crawler: :class:`~scrapy.crawler.Crawler` object
 
-
-.. _Exception: https://docs.python.org/3/library/exceptions.html#Exception
-
-
 .. _topics-spider-middleware-ref:
 
 Built-in spider middleware reference
diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst
index 89609db7d..231db6cea 100644
--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@@ -298,9 +298,7 @@ Keep in mind that spider arguments are only strings.
 The spider will not do any parsing on its own.
 If you were to set the ``start_urls`` attribute from the command line,
 you would have to parse it on your own into a list
-using something like
-`ast.literal_eval <https://docs.python.org/3/library/ast.html#ast.literal_eval>`_
-or `json.loads <https://docs.python.org/3/library/json.html#json.loads>`_
+using something like :func:`ast.literal_eval` or :func:`json.loads`
 and then set it as an attribute.
 Otherwise, you would cause iteration over a ``start_urls`` string
 (a very common python pitfall)
diff --git a/docs/topics/telnetconsole.rst b/docs/topics/telnetconsole.rst
index 47d8d393c..9802a34a2 100644
--- a/docs/topics/telnetconsole.rst
+++ b/docs/topics/telnetconsole.rst
@@ -40,10 +40,10 @@ the console you need to type::
     Connected to localhost.
     Escape character is '^]'.
     Username:
-    Password:  
+    Password:
     >>>
 
-By default Username is ``scrapy`` and Password is autogenerated. The 
+By default Username is ``scrapy`` and Password is autogenerated. The
 autogenerated Password can be seen on Scrapy logs like the example below::
 
     2018-10-16 14:35:21 [scrapy.extensions.telnet] INFO: Telnet Password: 16f92501e8a59326
@@ -63,7 +63,7 @@ Available variables in the telnet console
 =========================================
 
 The telnet console is like a regular Python shell running inside the Scrapy
-process, so you can do anything from it including importing new modules, etc. 
+process, so you can do anything from it including importing new modules, etc.
 
 However, the telnet console comes with some default variables defined for
 convenience:
@@ -89,13 +89,11 @@ convenience:
 +----------------+-------------------------------------------------------------------+
 | ``prefs``      | for memory debugging (see :ref:`topics-leaks`)                    |
 +----------------+-------------------------------------------------------------------+
-| ``p``          | a shortcut to the `pprint.pprint`_ function                       |
+| ``p``          | a shortcut to the :func:`pprint.pprint` function                  |
 +----------------+-------------------------------------------------------------------+
 | ``hpy``        | for memory debugging (see :ref:`topics-leaks`)                    |
 +----------------+-------------------------------------------------------------------+
 
-.. _pprint.pprint: https://docs.python.org/library/pprint.html#pprint.pprint
-
 Telnet console usage examples
 =============================
 
@@ -208,4 +206,3 @@ Default: ``None``
 
 The password used for the telnet console, default behaviour is to have it
 autogenerated
-
diff --git a/scrapy/item.py b/scrapy/item.py
index 1d39b48b2..748368932 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -121,9 +121,7 @@ class DictItem(MutableMapping, BaseItem):
         return self.__class__(self)
 
     def deepcopy(self):
-        """Return a `deep copy`_ of this item.
-
-        .. _deep copy: https://docs.python.org/library/copy.html#copy.deepcopy
+        """Return a :func:`~copy.deepcopy` of this item.
         """
         return deepcopy(self)
 

From 36a3913a41a033e57c6119edd8bbf967044c927f Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 22 Mar 2020 21:34:20 -0300
Subject: [PATCH 034/181] E501 compliance

---
 pytest.ini                           | 176 +++++++++------------------
 scrapy/cmdline.py                    |   5 +-
 scrapy/core/downloader/middleware.py |  18 ++-
 scrapy/robotstxt.py                  |  10 +-
 scrapy/spiders/crawl.py              |   8 +-
 5 files changed, 85 insertions(+), 132 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 141a13a4f..781a45541 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -20,6 +20,7 @@ addopts =
 twisted = 1
 markers =
     only_asyncio: marks tests as only enabled when --reactor=asyncio is passed
+flake8-max-line-length = 119
 flake8-ignore =
     W503
     # Files that are only meant to provide top-level imports are expected not
@@ -27,146 +28,85 @@ flake8-ignore =
     scrapy/core/downloader/handlers/http.py F401
     scrapy/http/__init__.py F401
     # Issues pending a review:
-    # extras
-    extras/qps-bench-server.py E501
-    extras/qpsclient.py E501 E501
     # scrapy/commands
-    scrapy/commands/__init__.py E128 E501
-    scrapy/commands/check.py E501
-    scrapy/commands/crawl.py E501
-    scrapy/commands/edit.py E501
-    scrapy/commands/fetch.py E401 E501 E128 E731
-    scrapy/commands/genspider.py E128 E501 E502
-    scrapy/commands/parse.py E128 E501 E731
-    scrapy/commands/runspider.py E501
+    scrapy/commands/__init__.py E128
+    scrapy/commands/fetch.py E401 E128 E731
+    scrapy/commands/genspider.py E128 E502
+    scrapy/commands/parse.py E128 E731
+    scrapy/commands/runspider.py
     scrapy/commands/settings.py E128
-    scrapy/commands/shell.py E128 E501 E502
-    scrapy/commands/startproject.py E127 E501 E128
-    scrapy/commands/version.py E501 E128
+    scrapy/commands/shell.py E128 E502
+    scrapy/commands/startproject.py E127 E128
+    scrapy/commands/version.py E128
     # scrapy/contracts
-    scrapy/contracts/__init__.py E501 W504
+    scrapy/contracts/__init__.py W504
     scrapy/contracts/default.py E128
     # scrapy/core
-    scrapy/core/engine.py E501 E128 E127 E502
-    scrapy/core/scheduler.py E501
-    scrapy/core/scraper.py E501 E128 W504
-    scrapy/core/spidermw.py E501 E731 E126
-    scrapy/core/downloader/__init__.py E501
-    scrapy/core/downloader/contextfactory.py E501 E128 E126
-    scrapy/core/downloader/middleware.py E501 E502
-    scrapy/core/downloader/tls.py E501 E241
-    scrapy/core/downloader/webclient.py E731 E501 E128 E126
-    scrapy/core/downloader/handlers/__init__.py E501
-    scrapy/core/downloader/handlers/ftp.py E501 E128 E127
-    scrapy/core/downloader/handlers/http10.py E501
-    scrapy/core/downloader/handlers/http11.py E501
-    scrapy/core/downloader/handlers/s3.py E501 E128 E126
+    scrapy/core/engine.py E128 E127 E502
+    scrapy/core/scheduler.py
+    scrapy/core/scraper.py E128 W504
+    scrapy/core/spidermw.py E731 E126
+    scrapy/core/downloader/__init__.py
+    scrapy/core/downloader/contextfactory.py E128 E126
+    scrapy/core/downloader/middleware.py E502
+    scrapy/core/downloader/tls.py E241
+    scrapy/core/downloader/webclient.py E731 E128 E126
+    scrapy/core/downloader/handlers/ftp.py E128 E127
+    scrapy/core/downloader/handlers/s3.py E128 E126
     # scrapy/downloadermiddlewares
-    scrapy/downloadermiddlewares/ajaxcrawl.py E501
-    scrapy/downloadermiddlewares/decompression.py E501
-    scrapy/downloadermiddlewares/defaultheaders.py E501
-    scrapy/downloadermiddlewares/httpcache.py E501 E126
-    scrapy/downloadermiddlewares/httpcompression.py E501 E128
-    scrapy/downloadermiddlewares/httpproxy.py E501
-    scrapy/downloadermiddlewares/redirect.py E501 W504
-    scrapy/downloadermiddlewares/retry.py E501 E126
-    scrapy/downloadermiddlewares/robotstxt.py E501
-    scrapy/downloadermiddlewares/stats.py E501
+    scrapy/downloadermiddlewares/httpcache.py E126
+    scrapy/downloadermiddlewares/httpcompression.py E128
+    scrapy/downloadermiddlewares/redirect.py W504
+    scrapy/downloadermiddlewares/retry.py E126
     # scrapy/extensions
-    scrapy/extensions/closespider.py E501 E128 E123
-    scrapy/extensions/corestats.py E501
-    scrapy/extensions/feedexport.py E128 E501
-    scrapy/extensions/httpcache.py E128 E501
-    scrapy/extensions/memdebug.py E501
-    scrapy/extensions/spiderstate.py E501
-    scrapy/extensions/telnet.py E501 W504
-    scrapy/extensions/throttle.py E501
+    scrapy/extensions/closespider.py E128 E123
+    scrapy/extensions/feedexport.py E128
+    scrapy/extensions/httpcache.py E128
+    scrapy/extensions/telnet.py W504
     # scrapy/http
-    scrapy/http/common.py E501
-    scrapy/http/cookies.py E501
-    scrapy/http/request/__init__.py E501
-    scrapy/http/request/form.py E501 E123
-    scrapy/http/request/json_request.py E501
-    scrapy/http/response/__init__.py E501 E128
-    scrapy/http/response/text.py E501 E128 E124
+    scrapy/http/request/form.py E123
+    scrapy/http/response/__init__.py E128
+    scrapy/http/response/text.py E128 E124
     # scrapy/linkextractors
-    scrapy/linkextractors/__init__.py E731 E501 E402 W504
-    scrapy/linkextractors/lxmlhtml.py E501 E731
+    scrapy/linkextractors/__init__.py E731 E402 W504
+    scrapy/linkextractors/lxmlhtml.py E731
     # scrapy/loader
-    scrapy/loader/__init__.py E501 E128
-    scrapy/loader/processors.py E501
+    scrapy/loader/__init__.py E128
+    scrapy/loader/processors.py
     # scrapy/pipelines
-    scrapy/pipelines/__init__.py E501
-    scrapy/pipelines/files.py E116 E501 E266
-    scrapy/pipelines/images.py E265 E501
-    scrapy/pipelines/media.py E125 E501 E266
+    scrapy/pipelines/files.py E116 E266
+    scrapy/pipelines/images.py E265
+    scrapy/pipelines/media.py E125 E266
     # scrapy/selector
     scrapy/selector/__init__.py F403
-    scrapy/selector/unified.py E501 E111
+    scrapy/selector/unified.py E111
     # scrapy/settings
-    scrapy/settings/__init__.py E501
-    scrapy/settings/default_settings.py E501 E114 E116
-    scrapy/settings/deprecated.py E501
+    scrapy/settings/default_settings.py E114 E116
     # scrapy/spidermiddlewares
-    scrapy/spidermiddlewares/httperror.py E501
-    scrapy/spidermiddlewares/offsite.py E501
-    scrapy/spidermiddlewares/referer.py E501 E129 W504
-    scrapy/spidermiddlewares/urllength.py E501
+    scrapy/spidermiddlewares/referer.py E129 W504
     # scrapy/spiders
-    scrapy/spiders/__init__.py E501 E402
-    scrapy/spiders/crawl.py E501
-    scrapy/spiders/feed.py E501
-    scrapy/spiders/sitemap.py E501
+    scrapy/spiders/__init__.py E402
     # scrapy/utils
-    scrapy/utils/asyncio.py E501
-    scrapy/utils/benchserver.py E501
-    scrapy/utils/conf.py E402 E501
-    scrapy/utils/datatypes.py E501
-    scrapy/utils/decorators.py E501
-    scrapy/utils/defer.py E501 E128
-    scrapy/utils/deprecate.py E128 E501 E127 E502
-    scrapy/utils/gz.py E501 W504
+    scrapy/utils/defer.py E128
+    scrapy/utils/deprecate.py E128 E127 E502
+    scrapy/utils/gz.py W504
     scrapy/utils/http.py F403
-    scrapy/utils/httpobj.py E501
-    scrapy/utils/iterators.py E501
-    scrapy/utils/log.py E128 E501
+    scrapy/utils/log.py E128
     scrapy/utils/markup.py F403
-    scrapy/utils/misc.py E501
     scrapy/utils/multipart.py F403
-    scrapy/utils/project.py E501
-    scrapy/utils/python.py E501
-    scrapy/utils/reactor.py E501
-    scrapy/utils/reqser.py E501
-    scrapy/utils/request.py E127 E501
-    scrapy/utils/response.py E501 E128
-    scrapy/utils/signal.py E501 E128
-    scrapy/utils/sitemap.py E501
-    scrapy/utils/spider.py E501
-    scrapy/utils/ssl.py E501
-    scrapy/utils/test.py E501
-    scrapy/utils/url.py E501 F403 E128 F405
+    scrapy/utils/request.py E127
+    scrapy/utils/response.py E128
+    scrapy/utils/signal.py E128
+    scrapy/utils/url.py F403 E128 F405
     # scrapy
-    scrapy/__init__.py E402 E501
-    scrapy/cmdline.py E501
-    scrapy/crawler.py E501
-    scrapy/dupefilters.py E501 E202
-    scrapy/exceptions.py E501
-    scrapy/exporters.py E501
-    scrapy/interfaces.py E501
-    scrapy/item.py E501 E128
-    scrapy/link.py E501
-    scrapy/logformatter.py E501
-    scrapy/mail.py E402 E128 E501 E502
-    scrapy/middleware.py E128 E501
-    scrapy/pqueues.py E501
-    scrapy/resolver.py E501
-    scrapy/responsetypes.py E128 E501
-    scrapy/robotstxt.py E501
-    scrapy/shell.py E501
-    scrapy/signalmanager.py E501
-    scrapy/spiderloader.py F841 E501 E126
+    scrapy/__init__.py E402
+    scrapy/dupefilters.py E202
+    scrapy/item.py E128
+    scrapy/mail.py E402 E128 E502
+    scrapy/middleware.py E128
+    scrapy/responsetypes.py E128
+    scrapy/spiderloader.py F841 E126
     scrapy/squeues.py E128
-    scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501
     tests/mockserver.py E401 E501 E126 E123
diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py
index a4ec7c8ae..b189e016b 100644
--- a/scrapy/cmdline.py
+++ b/scrapy/cmdline.py
@@ -165,6 +165,7 @@ if __name__ == '__main__':
     try:
         execute()
     finally:
-        # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect()
-        # on exit: http://doc.pypy.org/en/latest/cpython_differences.html?highlight=gc.collect#differences-related-to-garbage-collection-strategies
+        # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
+        # http://doc.pypy.org/en/latest/cpython_differences.html
+        # ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
         garbage_collect()
diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
index 9c0014206..83c7b1f19 100644
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@@ -35,8 +35,10 @@ class DownloaderMiddlewareManager(MiddlewareManager):
             for method in self.methods['process_request']:
                 response = yield deferred_from_coro(method(request=request, spider=spider))
                 if response is not None and not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_request must return None, Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, response.__class__.__name__))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_request must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, response.__class__.__name__)
+                    )
                 if response:
                     defer.returnValue(response)
             defer.returnValue((yield download_func(request=request, spider=spider)))
@@ -50,8 +52,10 @@ class DownloaderMiddlewareManager(MiddlewareManager):
             for method in self.methods['process_response']:
                 response = yield deferred_from_coro(method(request=request, response=response, spider=spider))
                 if not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_response must return Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, type(response)))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_response must return Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                 if isinstance(response, Request):
                     defer.returnValue(response)
             defer.returnValue(response)
@@ -62,8 +66,10 @@ class DownloaderMiddlewareManager(MiddlewareManager):
             for method in self.methods['process_exception']:
                 response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider))
                 if response is not None and not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_exception must return None, Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, type(response)))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_exception must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                 if response:
                     defer.returnValue(response)
             defer.returnValue(_failure)
diff --git a/scrapy/robotstxt.py b/scrapy/robotstxt.py
index 0a9af3a62..52cf09844 100644
--- a/scrapy/robotstxt.py
+++ b/scrapy/robotstxt.py
@@ -17,10 +17,12 @@ def decode_robotstxt(robotstxt_body, spider, to_native_str_type=False):
     except UnicodeDecodeError:
         # If we found garbage or robots.txt in an encoding other than UTF-8, disregard it.
         # Switch to 'allow all' state.
-        logger.warning("Failure while parsing robots.txt. "
-                       "File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.",
-                       exc_info=sys.exc_info(),
-                       extra={'spider': spider})
+        logger.warning(
+            "Failure while parsing robots.txt. File either contains garbage or "
+            "is in an encoding other than UTF-8, treating it as an empty file.",
+            exc_info=sys.exc_info(),
+            extra={'spider': spider},
+        )
         robotstxt_body = ''
     return robotstxt_body
 
diff --git a/scrapy/spiders/crawl.py b/scrapy/spiders/crawl.py
index d76a96451..3d62224bf 100644
--- a/scrapy/spiders/crawl.py
+++ b/scrapy/spiders/crawl.py
@@ -54,8 +54,12 @@ class Rule:
         self.process_request = _get_method(self.process_request, spider)
         self.process_request_argcount = len(get_func_args(self.process_request))
         if self.process_request_argcount == 1:
-            msg = 'Rule.process_request should accept two arguments (request, response), accepting only one is deprecated'
-            warnings.warn(msg, category=ScrapyDeprecationWarning, stacklevel=2)
+            warnings.warn(
+                "Rule.process_request should accept two arguments"
+                " (request, response), accepting only one is deprecated",
+                category=ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
 
     def _process_request(self, request, response):
         """

From 182394bcecf556854291f1a0d2e0d2c406bc7b48 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 22 Mar 2020 23:29:30 -0300
Subject: [PATCH 035/181] E501 compliance (tests)

---
 pytest.ini                                    | 129 ++++++++----------
 tests/test_command_shell.py                   |   4 +-
 tests/test_crawler.py                         |   4 +-
 tests/test_downloader_handlers.py             |  10 +-
 ...st_downloadermiddleware_httpcompression.py |   9 +-
 tests/test_downloadermiddleware_redirect.py   |  10 +-
 tests/test_exporters.py                       |  25 +++-
 tests/test_feedexport.py                      |  31 +++--
 tests/test_http_request.py                    |   5 +-
 tests/test_http_response.py                   |  33 +++--
 tests/test_linkextractors.py                  |  49 ++++---
 tests/test_pipeline_files.py                  |  49 ++++---
 tests/test_pipeline_images.py                 |  39 +++---
 tests/test_request_cb_kwargs.py               |  10 +-
 tests/test_responsetypes.py                   |   9 +-
 tests/test_selector.py                        |  32 +++--
 tests/test_spider.py                          |   4 +-
 tests/test_spidermiddleware_output_chain.py   |  20 ++-
 tests/test_spidermiddleware_referer.py        |  86 ++++++++----
 tests/test_utils_iterators.py                 |  25 +++-
 tests/test_utils_request.py                   |   8 +-
 tests/test_utils_sitemap.py                   |  36 +++--
 tests/test_utils_url.py                       |  23 +++-
 23 files changed, 415 insertions(+), 235 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 781a45541..6f7e09478 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -108,84 +108,63 @@ flake8-ignore =
     scrapy/spiderloader.py F841 E126
     scrapy/squeues.py E128
     # tests
-    tests/__init__.py E402 E501
-    tests/mockserver.py E401 E501 E126 E123
+    tests/__init__.py E402
+    tests/mockserver.py E401 E126 E123
     tests/pipelines.py F841
-    tests/spiders.py E501 E127
-    tests/test_closespider.py E501 E127
-    tests/test_command_fetch.py E501
-    tests/test_command_parse.py E501 E128
-    tests/test_command_shell.py E501 E128
-    tests/test_commands.py E128 E501
-    tests/test_contracts.py E501 E128
-    tests/test_crawl.py E501 E741 E265
-    tests/test_crawler.py F841 E501
-    tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
-    tests/test_downloadermiddleware.py E501
-    tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
+    tests/spiders.py E127
+    tests/test_closespider.py E127
+    tests/test_command_parse.py E128
+    tests/test_command_shell.py E128
+    tests/test_commands.py E128
+    tests/test_contracts.py E128
+    tests/test_crawl.py E741 E265
+    tests/test_crawler.py F841
+    tests/test_dependencies.py F841
+    tests/test_downloader_handlers.py E124 E127 E128 E265 E126 E123
+    tests/test_downloadermiddleware_cookies.py E731 E741 E128 E265 E126
     tests/test_downloadermiddleware_decompression.py E127
-    tests/test_downloadermiddleware_defaultheaders.py E501
-    tests/test_downloadermiddleware_downloadtimeout.py E501
-    tests/test_downloadermiddleware_httpcache.py E501
-    tests/test_downloadermiddleware_httpcompression.py E501 E126 E123
-    tests/test_downloadermiddleware_httpproxy.py E501 E128
-    tests/test_downloadermiddleware_redirect.py E501 E128 E127
-    tests/test_downloadermiddleware_retry.py E501 E128 E126
-    tests/test_downloadermiddleware_robotstxt.py E501
-    tests/test_downloadermiddleware_stats.py E501
-    tests/test_dupefilters.py E501 E741 E128 E124
-    tests/test_engine.py E401 E501 E128
-    tests/test_exporters.py E501 E731 E128 E124
+    tests/test_downloadermiddleware_httpcompression.py E126 E123
+    tests/test_downloadermiddleware_httpproxy.py E128
+    tests/test_downloadermiddleware_redirect.py E128 E127
+    tests/test_downloadermiddleware_retry.py E128 E126
+    tests/test_dupefilters.py E741 E128 E124
+    tests/test_engine.py E401 E128
+    tests/test_exporters.py E731 E128 E124
     tests/test_extension_telnet.py F841
-    tests/test_feedexport.py E501 F841 E241
-    tests/test_http_cookies.py E501
-    tests/test_http_headers.py E501
-    tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
-    tests/test_http_response.py E501 E128 E265
+    tests/test_feedexport.py F841 E241
+    tests/test_http_request.py E402 E127 E128 E128 E126 E123
+    tests/test_http_response.py E128 E265
     tests/test_item.py E128 F841
-    tests/test_link.py E501
-    tests/test_linkextractors.py E501 E128 E124
-    tests/test_loader.py E501 E731 E741 E128 E117 E241
-    tests/test_logformatter.py E128 E501 E122
-    tests/test_mail.py E128 E501
-    tests/test_middleware.py E501 E128
-    tests/test_pipeline_crawl.py E501 E128 E126
-    tests/test_pipeline_files.py E501
-    tests/test_pipeline_images.py F841 E501
-    tests/test_pipeline_media.py E501 E741 E731 E128 E502
-    tests/test_proxy_connect.py E501 E741
-    tests/test_request_cb_kwargs.py E501
-    tests/test_responsetypes.py E501
-    tests/test_robotstxt_interface.py E501 E501
-    tests/test_scheduler.py E501 E126 E123
-    tests/test_selector.py E501 E127
-    tests/test_spider.py E501
-    tests/test_spidermiddleware.py E501
-    tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
-    tests/test_spidermiddleware_offsite.py E501 E128 E111
-    tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
-    tests/test_squeues.py E501 E741
-    tests/test_utils_asyncio.py E501
-    tests/test_utils_conf.py E501 E128
-    tests/test_utils_curl.py E501
-    tests/test_utils_datatypes.py E402 E501
-    tests/test_utils_defer.py E501 F841
-    tests/test_utils_deprecate.py F841 E501
-    tests/test_utils_http.py E501 E128 W504
-    tests/test_utils_iterators.py E501 E128 E129 E241
+    tests/test_linkextractors.py E128 E124
+    tests/test_loader.py E731 E741 E128 E117 E241
+    tests/test_logformatter.py E128 E122
+    tests/test_mail.py E128
+    tests/test_middleware.py E128
+    tests/test_pipeline_crawl.py E128 E126
+    tests/test_pipeline_images.py F841
+    tests/test_pipeline_media.py E741 E731 E128 E502
+    tests/test_proxy_connect.py E741
+    tests/test_scheduler.py E126 E123
+    tests/test_selector.py E127
+    tests/test_spidermiddleware_httperror.py E128 E127 E121
+    tests/test_spidermiddleware_offsite.py E128 E111
+    tests/test_spidermiddleware_referer.py F841 E125 E201 E124 E241 E121
+    tests/test_squeues.py E741
+    tests/test_utils_conf.py E128
+    tests/test_utils_datatypes.py E402
+    tests/test_utils_defer.py F841
+    tests/test_utils_deprecate.py F841
+    tests/test_utils_http.py E128 W504
+    tests/test_utils_iterators.py E128 E129 E241
     tests/test_utils_log.py E741
-    tests/test_utils_python.py E501 E731
-    tests/test_utils_reqser.py E501 E128
-    tests/test_utils_request.py E501 E128
-    tests/test_utils_response.py E501
+    tests/test_utils_python.py E731
+    tests/test_utils_reqser.py E128
+    tests/test_utils_request.py E128
     tests/test_utils_signal.py E741 F841 E731
-    tests/test_utils_sitemap.py E128 E501 E124
-    tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
-    tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
-    tests/test_cmdline/__init__.py E501
-    tests/test_settings/__init__.py E501 E128
-    tests/test_spiderloader/__init__.py E128 E501
-    tests/test_utils_misc/__init__.py E501
+    tests/test_utils_sitemap.py E128 E124
+    tests/test_utils_url.py E127 E125 E241 E126 E123
+    tests/test_webclient.py E128 E122 E402 E241 E123 E126
+    tests/test_cmdline/__init__.py
+    tests/test_settings/__init__.py E128
+    tests/test_spiderloader/__init__.py E128
+    tests/test_utils_misc/__init__.py
diff --git a/tests/test_command_shell.py b/tests/test_command_shell.py
index d664b6ade..5dc0ba3d2 100644
--- a/tests/test_command_shell.py
+++ b/tests/test_command_shell.py
@@ -56,7 +56,9 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
 
     @defer.inlineCallbacks
     def test_redirect_not_follow_302(self):
-        _, out, _ = yield self.execute(['--no-redirect', self.url('/redirect-no-meta-refresh'), '-c', 'response.status'])
+        _, out, _ = yield self.execute(
+            ['--no-redirect', self.url('/redirect-no-meta-refresh'), '-c', 'response.status']
+        )
         assert out.strip().endswith(b'302')
 
     @defer.inlineCallbacks
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 169e763f0..bfc9a609a 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -303,8 +303,10 @@ class CrawlerProcessSubprocess(unittest.TestCase):
     def test_ipv6_default_name_resolver(self):
         log = self.run_script('default_name_resolver.py')
         self.assertIn('Spider closed (finished)', log)
-        self.assertIn("twisted.internet.error.DNSLookupError: DNS lookup failed: no results for hostname lookup: ::1.", log)
         self.assertIn("'downloader/exception_type_count/twisted.internet.error.DNSLookupError': 1,", log)
+        self.assertIn(
+            "twisted.internet.error.DNSLookupError: DNS lookup failed: no results for hostname lookup: ::1.",
+            log)
 
     def test_ipv6_alternative_name_resolver(self):
         log = self.run_script('alternative_name_resolver.py')
diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
index 29d06bab4..1a7be5c1b 100644
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -493,7 +493,10 @@ class Http11TestCase(HttpTestCase):
 class Https11TestCase(Http11TestCase):
     scheme = 'https'
 
-    tls_log_message = 'SSL connection certificate: issuer "/C=IE/O=Scrapy/CN=localhost", subject "/C=IE/O=Scrapy/CN=localhost"'
+    tls_log_message = (
+        'SSL connection certificate: issuer "/C=IE/O=Scrapy/CN=localhost", '
+        'subject "/C=IE/O=Scrapy/CN=localhost"'
+    )
 
     @defer.inlineCallbacks
     def test_tls_logging(self):
@@ -542,7 +545,10 @@ class Https11InvalidDNSPattern(Https11TestCase):
             from service_identity.exceptions import CertificateError  # noqa: F401
         except ImportError:
             raise unittest.SkipTest("cryptography lib is too old")
-        self.tls_log_message = 'SSL connection certificate: issuer "/C=IE/O=Scrapy/CN=127.0.0.1", subject "/C=IE/O=Scrapy/CN=127.0.0.1"'
+        self.tls_log_message = (
+            'SSL connection certificate: issuer "/C=IE/O=Scrapy/CN=127.0.0.1", '
+            'subject "/C=IE/O=Scrapy/CN=127.0.0.1"'
+        )
         super(Https11InvalidDNSPattern, self).setUp()
 
 
diff --git a/tests/test_downloadermiddleware_httpcompression.py b/tests/test_downloadermiddleware_httpcompression.py
index 106ca3360..4d43e164f 100644
--- a/tests/test_downloadermiddleware_httpcompression.py
+++ b/tests/test_downloadermiddleware_httpcompression.py
@@ -124,7 +124,8 @@ class HttpCompressionTest(TestCase):
             'Content-Encoding': 'gzip',
         }
         f = BytesIO()
-        plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
+        plainbody = (b'<html><head><title>Some page</title>'
+                     b'<meta http-equiv="Content-Type" content="text/html; charset=gb2312">')
         zf = GzipFile(fileobj=f, mode='wb')
         zf.write(plainbody)
         zf.close()
@@ -142,7 +143,8 @@ class HttpCompressionTest(TestCase):
             'Content-Encoding': 'gzip',
         }
         f = BytesIO()
-        plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
+        plainbody = (b'<html><head><title>Some page</title>'
+                     b'<meta http-equiv="Content-Type" content="text/html; charset=gb2312">')
         zf = GzipFile(fileobj=f, mode='wb')
         zf.write(plainbody)
         zf.close()
@@ -158,7 +160,8 @@ class HttpCompressionTest(TestCase):
         headers = {
             'Content-Encoding': 'identity',
         }
-        plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
+        plainbody = (b'<html><head><title>Some page</title>'
+                     b'<meta http-equiv="Content-Type" content="text/html; charset=gb2312">')
         respcls = responsetypes.from_args(url="http://www.example.com/index", headers=headers, body=plainbody)
         response = respcls("http://www.example.com/index", headers=headers, body=plainbody)
         request = Request("http://www.example.com/index")
diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
index 053e26fc3..3044c5b21 100644
--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -151,7 +151,10 @@ class RedirectMiddlewareTest(unittest.TestCase):
         self.assertEqual(req2.url, 'http://scrapytest.org/redirected')
         self.assertEqual(req2.meta['redirect_urls'], ['http://scrapytest.org/first'])
         self.assertEqual(req3.url, 'http://scrapytest.org/redirected2')
-        self.assertEqual(req3.meta['redirect_urls'], ['http://scrapytest.org/first', 'http://scrapytest.org/redirected'])
+        self.assertEqual(
+            req3.meta['redirect_urls'],
+            ['http://scrapytest.org/first', 'http://scrapytest.org/redirected']
+        )
 
     def test_redirect_reasons(self):
         req1 = Request('http://scrapytest.org/first')
@@ -282,7 +285,10 @@ class MetaRefreshMiddlewareTest(unittest.TestCase):
         self.assertEqual(req2.url, 'http://scrapytest.org/redirected')
         self.assertEqual(req2.meta['redirect_urls'], ['http://scrapytest.org/first'])
         self.assertEqual(req3.url, 'http://scrapytest.org/redirected2')
-        self.assertEqual(req3.meta['redirect_urls'], ['http://scrapytest.org/first', 'http://scrapytest.org/redirected'])
+        self.assertEqual(
+            req3.meta['redirect_urls'],
+            ['http://scrapytest.org/first', 'http://scrapytest.org/redirected']
+        )
 
     def test_redirect_reasons(self):
         req1 = Request('http://scrapytest.org/first')
diff --git a/tests/test_exporters.py b/tests/test_exporters.py
index 6e2507508..b8301a567 100644
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -111,7 +111,10 @@ class PythonItemExporterTest(BaseItemExporterTest):
         ie = self._get_exporter()
         exported = ie.export_item(i3)
         self.assertEqual(type(exported), dict)
-        self.assertEqual(exported, {'age': {'age': {'age': '22', 'name': u'Joseph'}, 'name': u'Maria'}, 'name': 'Jesus'})
+        self.assertEqual(
+            exported,
+            {'age': {'age': {'age': '22', 'name': u'Joseph'}, 'name': u'Maria'}, 'name': 'Jesus'}
+        )
         self.assertEqual(type(exported['age']), dict)
         self.assertEqual(type(exported['age']['age']), dict)
 
@@ -121,7 +124,10 @@ class PythonItemExporterTest(BaseItemExporterTest):
         i3 = TestItem(name=u'Jesus', age=[i2])
         ie = self._get_exporter()
         exported = ie.export_item(i3)
-        self.assertEqual(exported, {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'})
+        self.assertEqual(
+            exported,
+            {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'}
+        )
         self.assertEqual(type(exported['age'][0]), dict)
         self.assertEqual(type(exported['age'][0]['age'][0]), dict)
 
@@ -131,7 +137,10 @@ class PythonItemExporterTest(BaseItemExporterTest):
         i3 = TestItem(name=u'Jesus', age=[i2])
         ie = self._get_exporter()
         exported = ie.export_item(i3)
-        self.assertEqual(exported, {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'})
+        self.assertEqual(
+            exported,
+            {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'}
+        )
         self.assertEqual(type(exported['age'][0]), dict)
         self.assertEqual(type(exported['age'][0]['age'][0]), dict)
 
@@ -327,13 +336,19 @@ class XmlItemExporterTest(BaseItemExporterTest):
         self.assertXmlEquivalent(fp.getvalue(), expected_value)
 
     def _check_output(self):
-        expected_value = b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
+        expected_value = (
+            b'<?xml version="1.0" encoding="utf-8"?>\n'
+            b'<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
+        )
         self.assertXmlEquivalent(self.output.getvalue(), expected_value)
 
     def test_multivalued_fields(self):
         self.assertExportResult(
             TestItem(name=[u'John\xa3', u'Doe']),
-            b'<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
+            (
+                b'<?xml version="1.0" encoding="utf-8"?>\n'
+                b'<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
+            )
         )
 
     def test_nested_item(self):
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
index c5589e52f..e248969db 100644
--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@@ -718,10 +718,13 @@ class FeedExportTest(unittest.TestCase):
         header = ['foo']
 
         formats = {
-            'json': u'[{"foo": "Test\\u00d6"}]'.encode('utf-8'),
-            'jsonlines': u'{"foo": "Test\\u00d6"}\n'.encode('utf-8'),
-            'xml': u'<?xml version="1.0" encoding="utf-8"?>\n<items><item><foo>Test\xd6</foo></item></items>'.encode('utf-8'),
-            'csv': u'foo\r\nTest\xd6\r\n'.encode('utf-8'),
+            'json': '[{"foo": "Test\\u00d6"}]'.encode('utf-8'),
+            'jsonlines': '{"foo": "Test\\u00d6"}\n'.encode('utf-8'),
+            'xml': (
+                '<?xml version="1.0" encoding="utf-8"?>\n'
+                '<items><item><foo>Test\xd6</foo></item></items>'
+            ).encode('utf-8'),
+            'csv': 'foo\r\nTest\xd6\r\n'.encode('utf-8'),
         }
 
         for fmt, expected in formats.items():
@@ -735,10 +738,13 @@ class FeedExportTest(unittest.TestCase):
             self.assertEqual(expected, data[fmt])
 
         formats = {
-            'json': u'[{"foo": "Test\xd6"}]'.encode('latin-1'),
-            'jsonlines': u'{"foo": "Test\xd6"}\n'.encode('latin-1'),
-            'xml': u'<?xml version="1.0" encoding="latin-1"?>\n<items><item><foo>Test\xd6</foo></item></items>'.encode('latin-1'),
-            'csv': u'foo\r\nTest\xd6\r\n'.encode('latin-1'),
+            'json': '[{"foo": "Test\xd6"}]'.encode('latin-1'),
+            'jsonlines': '{"foo": "Test\xd6"}\n'.encode('latin-1'),
+            'xml': (
+                '<?xml version="1.0" encoding="latin-1"?>\n'
+                '<items><item><foo>Test\xd6</foo></item></items>'
+            ).encode('latin-1'),
+            'csv': 'foo\r\nTest\xd6\r\n'.encode('latin-1'),
         }
 
         for fmt, expected in formats.items():
@@ -757,9 +763,12 @@ class FeedExportTest(unittest.TestCase):
         items = [dict({'foo': u'FOO', 'bar': u'BAR'})]
 
         formats = {
-            'json': u'[\n{"bar": "BAR"}\n]'.encode('utf-8'),
-            'xml': u'<?xml version="1.0" encoding="latin-1"?>\n<items>\n  <item>\n    <foo>FOO</foo>\n  </item>\n</items>'.encode('latin-1'),
-            'csv': u'bar,foo\r\nBAR,FOO\r\n'.encode('utf-8'),
+            'json': '[\n{"bar": "BAR"}\n]'.encode('utf-8'),
+            'xml': (
+                '<?xml version="1.0" encoding="latin-1"?>\n'
+                '<items>\n  <item>\n    <foo>FOO</foo>\n  </item>\n</items>'
+            ).encode('latin-1'),
+            'csv': 'bar,foo\r\nBAR,FOO\r\n'.encode('utf-8'),
         }
 
         settings = {
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index cc2cddda4..87d98d6ee 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -1262,7 +1262,10 @@ class XmlRpcRequestTest(RequestTest):
 class JsonRequestTest(RequestTest):
     request_class = JsonRequest
     default_method = 'GET'
-    default_headers = {b'Content-Type': [b'application/json'], b'Accept': [b'application/json, text/javascript, */*; q=0.01']}
+    default_headers = {
+        b'Content-Type': [b'application/json'],
+        b'Accept': [b'application/json, text/javascript, */*; q=0.01'],
+    }
 
     def setUp(self):
         warnings.simplefilter("always")
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index eafc3560e..a6b717bd1 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -25,7 +25,11 @@ class BaseResponseTest(unittest.TestCase):
         self.assertTrue(isinstance(self.response_class('http://example.com/', body=b''), self.response_class))
         self.assertTrue(isinstance(self.response_class('http://example.com/', body=b'body'), self.response_class))
         # test presence of all optional parameters
-        self.assertTrue(isinstance(self.response_class('http://example.com/', body=b'', headers={}, status=200), self.response_class))
+        self.assertTrue(
+            isinstance(
+                self.response_class('http://example.com/', body=b'', headers={}, status=200), self.response_class
+            )
+        )
 
         r = self.response_class("http://www.example.com")
         assert isinstance(r.url, str)
@@ -323,13 +327,16 @@ class TextResponseTest(BaseResponseTest):
         self.assertEqual(resp.url, to_unicode(b'http://www.example.com/price/\xc2\xa3'))
         resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='latin-1')
         self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
-        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=utf-8"]})
+        resp = self.response_class(u"http://www.example.com/price/\xa3",
+                                   headers={"Content-type": ["text/html; charset=utf-8"]})
         self.assertEqual(resp.url, to_unicode(b'http://www.example.com/price/\xc2\xa3'))
-        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=iso-8859-1"]})
+        resp = self.response_class(u"http://www.example.com/price/\xa3",
+                                   headers={"Content-type": ["text/html; charset=iso-8859-1"]})
         self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
 
     def test_unicode_body(self):
-        unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
+        unicode_string = ('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 '
+                          '\u0442\u0435\u043a\u0441\u0442')
         self.assertRaises(TypeError, self.response_class, 'http://www.example.com', body=u'unicode body')
 
         original_string = unicode_string.encode('cp1251')
@@ -344,13 +351,18 @@ class TextResponseTest(BaseResponseTest):
         self.assertEqual(r1.text, unicode_string)
 
     def test_encoding(self):
-        r1 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=utf-8"]}, body=b"\xc2\xa3")
+        r1 = self.response_class("http://www.example.com", body=b"\xc2\xa3",
+                                 headers={"Content-type": ["text/html; charset=utf-8"]})
         r2 = self.response_class("http://www.example.com", encoding='utf-8', body=u"\xa3")
-        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=b"\xa3")
+        r3 = self.response_class("http://www.example.com", body=b"\xa3",
+                                 headers={"Content-type": ["text/html; charset=iso-8859-1"]})
         r4 = self.response_class("http://www.example.com", body=b"\xa2\xa3")
-        r5 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=None"]}, body=b"\xc2\xa3")
-        r6 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gb2312"]}, body=b"\xa8D")
-        r7 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gbk"]}, body=b"\xa8D")
+        r5 = self.response_class("http://www.example.com", body=b"\xc2\xa3",
+                                 headers={"Content-type": ["text/html; charset=None"]})
+        r6 = self.response_class("http://www.example.com", body=b"\xa8D",
+                                 headers={"Content-type": ["text/html; charset=gb2312"]})
+        r7 = self.response_class("http://www.example.com", body=b"\xa8D",
+                                 headers={"Content-type": ["text/html; charset=gbk"]})
 
         self.assertEqual(r1._headers_encoding(), "utf-8")
         self.assertEqual(r2._headers_encoding(), None)
@@ -685,7 +697,8 @@ class HtmlResponseTest(TextResponseTest):
         body = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
         </head><body>Price: \xa3100</body></html>'
         """
-        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=body)
+        r3 = self.response_class("http://www.example.com", body=body,
+                                 headers={"Content-type": ["text/html; charset=iso-8859-1"]})
         self._assert_response_values(r3, 'iso-8859-1', body)
 
         # make sure replace() preserves the encoding of the original response
diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py
index 53968e60e..ed5f2560a 100644
--- a/tests/test_linkextractors.py
+++ b/tests/test_linkextractors.py
@@ -279,8 +279,8 @@ class Base:
         def test_process_value(self):
             """Test restrict_xpaths with encodings"""
             html = b"""
-            <a href="javascript:goToPage('../other/page.html','photo','width=600,height=540,scrollbars'); return false">Link text</a>
-            <a href="/about.html">About us</a>
+<a href="javascript:goToPage('../other/page.html','photo','width=600,height=540,scrollbars'); return false">Text</a>
+<a href="/about.html">About us</a>
             """
             response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='windows-1252')
 
@@ -291,7 +291,7 @@ class Base:
 
             lx = self.extractor_cls(process_value=process_value)
             self.assertEqual(lx.extract_links(response),
-                             [Link(url='http://example.org/other/page.html', text='Link text')])
+                             [Link(url='http://example.org/other/page.html', text='Text')])
 
         def test_base_url_with_restrict_xpaths(self):
             html = b"""<html><head><title>Page title<title><base href="http://otherdomain.com/base/" />
@@ -332,7 +332,10 @@ class Base:
             self.assertEqual(lx.extract_links(self.response), [])
 
         def test_tags(self):
-            html = b"""<html><area href="sample1.html"></area><a href="sample2.html">sample 2</a><img src="sample2.jpg"/></html>"""
+            html = (
+                b'<html><area href="sample1.html"></area>'
+                b'<a href="sample2.html">sample 2</a><img src="sample2.jpg"/></html>'
+            )
             response = HtmlResponse("http://example.com/index.html", body=html)
 
             lx = self.extractor_cls(tags=None)
@@ -413,24 +416,34 @@ class Base:
             response = HtmlResponse("http://example.com/index.xhtml", body=xhtml)
 
             lx = self.extractor_cls()
-            self.assertEqual(lx.extract_links(response),
-                             [Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
-                              Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
-                              Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
-                              Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False),
-                              Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True)]
-                            )
+            self.assertEqual(
+                lx.extract_links(response),
+                [
+                    Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
+                    Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
+                    Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='',
+                         nofollow=True),
+                    Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='',
+                         nofollow=False),
+                    Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True),
+                ]
+            )
 
             response = XmlResponse("http://example.com/index.xhtml", body=xhtml)
 
             lx = self.extractor_cls()
-            self.assertEqual(lx.extract_links(response),
-                             [Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
-                              Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
-                              Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
-                              Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False),
-                              Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True)]
-                            )
+            self.assertEqual(
+                lx.extract_links(response),
+                [
+                    Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
+                    Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
+                    Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='',
+                         nofollow=True),
+                    Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='',
+                         nofollow=False),
+                    Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True),
+                ]
+            )
 
         def test_link_wrong_href(self):
             html = b"""
diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py
index f155db4ce..34fc8dcb6 100644
--- a/tests/test_pipeline_files.py
+++ b/tests/test_pipeline_files.py
@@ -38,27 +38,36 @@ class FilesPipelineTestCase(unittest.TestCase):
 
     def test_file_path(self):
         file_path = self.pipeline.file_path
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
-                         'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
-        self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt")),
-                         'full/4ce274dd83db0368bafd7e406f382ae088e39219.txt')
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc")),
-                         'full/94ccc495a17b9ac5d40e3eabf3afcb8c2c9b9e1a.doc')
-        self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
-                         'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
-                         'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
-                                   response=Response("http://www.dorma.co.uk/images/product_details/2532"),
-                                   info=object()),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
-        self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg.bohaha")),
-                         'full/76c00cef2ef669ae65052661f68d451162829507')
-        self.assertEqual(file_path(Request("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAR0AAACxCAMAAADOHZloAAACClBMVEX/\
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
+            'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
+        self.assertEqual(
+            file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt")),
+            'full/4ce274dd83db0368bafd7e406f382ae088e39219.txt')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc")),
+            'full/94ccc495a17b9ac5d40e3eabf3afcb8c2c9b9e1a.doc')
+        self.assertEqual(
+            file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
+            'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
+            'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
+                      response=Response("http://www.dorma.co.uk/images/product_details/2532"),
+                      info=object()),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
+        self.assertEqual(
+            file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg.bohaha")),
+            'full/76c00cef2ef669ae65052661f68d451162829507')
+        self.assertEqual(
+            file_path(Request("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAR0AAACxCAMAAADOHZloAAACClBMVEX/\
                                     //+F0tzCwMK76ZKQ21AMqr7oAAC96JvD5aWM2kvZ78J0N7fmAAC46Y4Ap7y")),
-                         'full/178059cbeba2e34120a67f2dc1afc3ecc09b61cb.png')
+            'full/178059cbeba2e34120a67f2dc1afc3ecc09b61cb.png')
 
     def test_fs_store(self):
         assert isinstance(self.pipeline.store, FSFilesStore)
diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py
index 5018d6802..e1c3a55dd 100644
--- a/tests/test_pipeline_images.py
+++ b/tests/test_pipeline_images.py
@@ -41,22 +41,29 @@ class ImagesPipelineTestCase(unittest.TestCase):
 
     def test_file_path(self):
         file_path = self.pipeline.file_path
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.gif")),
-                         'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
-        self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
-                         'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
-                         'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
-        self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
-                         'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
-                         'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
-                                   response=Response("http://www.dorma.co.uk/images/product_details/2532"),
-                                   info=object()),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/mydeco.gif")),
+            'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
+            'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
+            'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
+            'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
+            'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
+                      response=Response("http://www.dorma.co.uk/images/product_details/2532"),
+                      info=object()),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
 
     def test_thumbnail_name(self):
         thumb_path = self.pipeline.thumb_path
diff --git a/tests/test_request_cb_kwargs.py b/tests/test_request_cb_kwargs.py
index a3ddd50f4..bd49179aa 100644
--- a/tests/test_request_cb_kwargs.py
+++ b/tests/test_request_cb_kwargs.py
@@ -158,6 +158,12 @@ class CallbackKeywordArgumentsTestCase(TestCase):
                 if key in line.getMessage():
                     exceptions[key] = line
         self.assertEqual(exceptions['takes_less'].exc_info[0], TypeError)
-        self.assertEqual(str(exceptions['takes_less'].exc_info[1]), "parse_takes_less() got an unexpected keyword argument 'number'")
+        self.assertEqual(
+            str(exceptions['takes_less'].exc_info[1]),
+            "parse_takes_less() got an unexpected keyword argument 'number'"
+        )
         self.assertEqual(exceptions['takes_more'].exc_info[0], TypeError)
-        self.assertEqual(str(exceptions['takes_more'].exc_info[1]), "parse_takes_more() missing 1 required positional argument: 'other'")
+        self.assertEqual(
+            str(exceptions['takes_more'].exc_info[1]),
+            "parse_takes_more() missing 1 required positional argument: 'other'"
+        )
diff --git a/tests/test_responsetypes.py b/tests/test_responsetypes.py
index 8cdf7a176..9f4d56412 100644
--- a/tests/test_responsetypes.py
+++ b/tests/test_responsetypes.py
@@ -64,8 +64,9 @@ class ResponseTypesTest(unittest.TestCase):
     def test_from_headers(self):
         mappings = [
             ({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
-            ({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
             ({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
+            ({'Content-Type': ['application/octet-stream'],
+              'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
         ]
         for source, cls in mappings:
             source = Headers(source)
@@ -77,8 +78,10 @@ class ResponseTypesTest(unittest.TestCase):
         mappings = [
             ({'url': 'http://www.example.com/data.csv'}, TextResponse),
             # headers takes precedence over url
-            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
-            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),
+            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}),
+              'url': 'http://www.example.com/item/'}, HtmlResponse),
+            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}),
+              'url': 'http://www.example.com/page/'}, Response),
 
 
         ]
diff --git a/tests/test_selector.py b/tests/test_selector.py
index 09c2546fb..3629feb16 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -19,18 +19,26 @@ class SelectorTestCase(unittest.TestCase):
         for x in xl:
             assert isinstance(x, Selector)
 
-        self.assertEqual(sel.xpath('//input').getall(),
-                         [x.get() for x in sel.xpath('//input')])
-
-        self.assertEqual([x.get() for x in sel.xpath("//input[@name='a']/@name")],
-                         [u'a'])
-        self.assertEqual([x.get() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
-                         [u'12.0'])
-
-        self.assertEqual(sel.xpath("concat('xpath', 'rules')").getall(),
-                         [u'xpathrules'])
-        self.assertEqual([x.get() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
-                         [u'12'])
+        self.assertEqual(
+            sel.xpath('//input').getall(),
+            [x.get() for x in sel.xpath('//input')]
+        )
+        self.assertEqual(
+            [x.get() for x in sel.xpath("//input[@name='a']/@name")],
+            [u'a']
+        )
+        self.assertEqual(
+            [x.get() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
+            [u'12.0']
+        )
+        self.assertEqual(
+            sel.xpath("concat('xpath', 'rules')").getall(),
+            [u'xpathrules']
+        )
+        self.assertEqual(
+            [x.get() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
+            [u'12']
+        )
 
     def test_root_base_url(self):
         body = b'<html><form action="/path"><input name="a" /></form></html>'
diff --git a/tests/test_spider.py b/tests/test_spider.py
index bb00c8f42..805d70459 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -120,7 +120,9 @@ class XMLFeedSpiderTest(SpiderTest):
         body = b"""<?xml version="1.0" encoding="UTF-8"?>
         <urlset xmlns:x="http://www.google.com/schemas/sitemap/0.84"
                 xmlns:y="http://www.example.com/schemas/extras/1.0">
-        <url><x:loc>http://www.example.com/Special-Offers.html</loc><y:updated>2009-08-16</updated><other value="bar" y:custom="fuu"/></url>
+        <url><x:loc>http://www.example.com/Special-Offers.html</loc><y:updated>2009-08-16</updated>
+            <other value="bar" y:custom="fuu"/>
+        </url>
         <url><loc>http://www.example.com/</loc><y:updated>2009-08-16</updated><other value="foo"/></url>
         </urlset>"""
         response = XmlResponse(url='http://example.com/sitemap.xml', body=body)
diff --git a/tests/test_spidermiddleware_output_chain.py b/tests/test_spidermiddleware_output_chain.py
index b26353d6c..78d2de347 100644
--- a/tests/test_spidermiddleware_output_chain.py
+++ b/tests/test_spidermiddleware_output_chain.py
@@ -385,9 +385,15 @@ class TestSpiderMiddleware(TestCase):
         log4 = yield self.crawl_log(GeneratorOutputChainSpider)
         self.assertIn("'item_scraped_count': 2", str(log4))
         self.assertIn("GeneratorRecoverMiddleware.process_spider_exception: LookupError caught", str(log4))
-        self.assertIn("GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: LookupError caught", str(log4))
-        self.assertNotIn("GeneratorFailMiddleware.process_spider_exception: LookupError caught", str(log4))
-        self.assertNotIn("GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: LookupError caught", str(log4))
+        self.assertIn(
+            "GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: LookupError caught",
+            str(log4))
+        self.assertNotIn(
+            "GeneratorFailMiddleware.process_spider_exception: LookupError caught",
+            str(log4))
+        self.assertNotIn(
+            "GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: LookupError caught",
+            str(log4))
         item_from_callback = {'processed': [
             'parse-first-item',
             'GeneratorFailMiddleware.process_spider_output',
@@ -414,9 +420,13 @@ class TestSpiderMiddleware(TestCase):
         log5 = yield self.crawl_log(NotGeneratorOutputChainSpider)
         self.assertIn("'item_scraped_count': 1", str(log5))
         self.assertIn("GeneratorRecoverMiddleware.process_spider_exception: ReferenceError caught", str(log5))
-        self.assertIn("GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: ReferenceError caught", str(log5))
+        self.assertIn(
+            "GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: ReferenceError caught",
+            str(log5))
         self.assertNotIn("GeneratorFailMiddleware.process_spider_exception: ReferenceError caught", str(log5))
-        self.assertNotIn("GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: ReferenceError caught", str(log5))
+        self.assertNotIn(
+            "GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: ReferenceError caught",
+            str(log5))
         item_recovered = {'processed': [
             'NotGeneratorRecoverMiddleware.process_spider_exception',
             'NotGeneratorDoNothingAfterRecoveryMiddleware.process_spider_output']}
diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py
index 4c6ede70b..3974f7884 100644
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@@ -114,12 +114,16 @@ class MixinNoReferrerWhenDowngrade:
 class MixinSameOrigin:
     scenarii = [
         # Same origin (protocol, host, port): send referrer
-        ('https://example.com/page.html',       'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com:80/page.html',     'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com:80/not-page.html',      b'http://example.com/page.html'),
-        ('http://example.com:8888/page.html',   'http://example.com:8888/not-page.html',    b'http://example.com:8888/page.html'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        (
+            'http://example.com:8888/page.html',
+            'http://example.com:8888/not-page.html',
+            b'http://example.com:8888/page.html',
+        ),
 
         # Different host: do NOT send referrer
         ('https://example.com/page.html',       'https://not.example.com/otherpage.html',   None),
@@ -139,8 +143,12 @@ class MixinSameOrigin:
         ('ftps://example.com/urls.zip',     'https://example.com/not-page.html',    None),
 
         # test for user/password stripping
-        ('https://user:password@example.com/page.html', 'https://example.com/not-page.html',    b'https://example.com/page.html'),
-        ('https://user:password@example.com/page.html', 'http://example.com/not-page.html',     None),
+        ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
+        (
+            'https://user:password@example.com/page.html',
+            'https://example.com/not-page.html',
+            b'https://example.com/page.html',
+        ),
     ]
 
 
@@ -179,12 +187,16 @@ class MixinStrictOrigin:
 class MixinOriginWhenCrossOrigin:
     scenarii = [
         # Same origin (protocol, host, port): send referrer
-        ('https://example.com/page.html',       'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com:80/page.html',     'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com:80/not-page.html',      b'http://example.com/page.html'),
-        ('http://example.com:8888/page.html',   'http://example.com:8888/not-page.html',    b'http://example.com:8888/page.html'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        (
+            'http://example.com:8888/page.html',
+            'http://example.com:8888/not-page.html',
+            b'http://example.com:8888/page.html',
+        ),
 
         # Different host: send origin as referrer
         ('https://example2.com/page.html',  'https://scrapy.org/otherpage.html',        b'https://example2.com/'),
@@ -205,21 +217,33 @@ class MixinOriginWhenCrossOrigin:
         ('ftps://example4.com/urls.zip',    'https://example4.com/not-page.html',   b'ftps://example4.com/'),
 
         # test for user/password stripping
-        ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html',  b'https://example5.com/page.html'),
+        (
+            'https://user:password@example5.com/page.html',
+            'https://example5.com/not-page.html',
+            b'https://example5.com/page.html',
+        ),
         # TLS to non-TLS downgrade: send origin
-        ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html',   b'https://example5.com/'),
+        (
+            'https://user:password@example5.com/page.html',
+            'http://example5.com/not-page.html',
+            b'https://example5.com/',
+        ),
     ]
 
 
 class MixinStrictOriginWhenCrossOrigin:
     scenarii = [
         # Same origin (protocol, host, port): send referrer
-        ('https://example.com/page.html',       'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com:80/page.html',     'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com:80/not-page.html',      b'http://example.com/page.html'),
-        ('http://example.com:8888/page.html',   'http://example.com:8888/not-page.html',    b'http://example.com:8888/page.html'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        (
+            'http://example.com:8888/page.html',
+            'http://example.com:8888/not-page.html',
+            b'http://example.com:8888/page.html',
+        ),
 
         # Different host: send origin as referrer
         ('https://example2.com/page.html',  'https://scrapy.org/otherpage.html',        b'https://example2.com/'),
@@ -248,7 +272,11 @@ class MixinStrictOriginWhenCrossOrigin:
         ('ftps://example4.com/urls.zip',    'https://example4.com/not-page.html',   b'ftps://example4.com/'),
 
         # test for user/password stripping
-        ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html',  b'https://example5.com/page.html'),
+        (
+            'https://user:password@example5.com/page.html',
+            'https://example5.com/not-page.html',
+            b'https://example5.com/page.html',
+        ),
 
         # TLS to non-TLS downgrade: send nothing
         ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html',   None),
@@ -281,8 +309,16 @@ class MixinUnsafeUrl:
         ('ftp://example3.com/urls.zip',         'https://scrapy.org/',          b'ftp://example3.com/urls.zip'),
 
         # test for user/password stripping
-        ('http://user:password@example4.com/page.html',     'https://not.example4.com/',    b'http://example4.com/page.html'),
-        ('https://user:password@example4.com/page.html',    'http://scrapy.org/',           b'https://example4.com/page.html'),
+        (
+            'http://user:password@example4.com/page.html',
+            'https://not.example4.com/',
+            b'http://example4.com/page.html',
+        ),
+        (
+            'https://user:password@example4.com/page.html',
+            'http://scrapy.org/',
+            b'https://example4.com/page.html',
+        ),
     ]
 
 
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index 33fc4d570..7c4011142 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -17,7 +17,8 @@ class XmliterTestCase(unittest.TestCase):
 
     def test_xmliter(self):
         body = b"""<?xml version="1.0" encoding="UTF-8"?>\
-            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="someschmea.xsd">\
+            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                      xsi:noNamespaceSchemaLocation="someschmea.xsd">\
               <product id="001">\
                 <type>Type 1</type>\
                 <name>Name 1</name>\
@@ -107,7 +108,10 @@ class XmliterTestCase(unittest.TestCase):
                               (u'27', [u'A'], [u'27'])])
 
     def test_xmliter_text(self):
-        body = u"""<?xml version="1.0" encoding="UTF-8"?><products><product>one</product><product>two</product></products>"""
+        body = (
+            '<?xml version="1.0" encoding="UTF-8"?>'
+            '<products><product>one</product><product>two</product></products>'
+        )
 
         self.assertEqual([x.xpath("text()").getall() for x in self.xmliter(body, 'product')],
                          [[u'one'], [u'two']])
@@ -139,7 +143,10 @@ class XmliterTestCase(unittest.TestCase):
         self.assertEqual(node.xpath('title/text()').getall(), ['Item 1'])
         self.assertEqual(node.xpath('description/text()').getall(), ['This is item 1'])
         self.assertEqual(node.xpath('link/text()').getall(), ['http://www.mydummycompany.com/items/1'])
-        self.assertEqual(node.xpath('g:image_link/text()').getall(), ['http://www.mydummycompany.com/images/item1.jpg'])
+        self.assertEqual(
+            node.xpath('g:image_link/text()').getall(),
+            ['http://www.mydummycompany.com/images/item1.jpg']
+        )
         self.assertEqual(node.xpath('g:id/text()').getall(), ['ITEM_1'])
         self.assertEqual(node.xpath('g:price/text()').getall(), ['400'])
         self.assertEqual(node.xpath('image_link/text()').getall(), [])
@@ -147,7 +154,10 @@ class XmliterTestCase(unittest.TestCase):
         self.assertEqual(node.xpath('price/text()').getall(), [])
 
     def test_xmliter_exception(self):
-        body = u"""<?xml version="1.0" encoding="UTF-8"?><products><product>one</product><product>two</product></products>"""
+        body = (
+            '<?xml version="1.0" encoding="UTF-8"?>'
+            '<products><product>one</product><product>two</product></products>'
+        )
 
         iter = self.xmliter(body, 'product')
         next(iter)
@@ -160,7 +170,12 @@ class XmliterTestCase(unittest.TestCase):
         self.assertRaises(AssertionError, next, i)
 
     def test_xmliter_encoding(self):
-        body = b'<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n    <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'
+        body = (
+            b'<?xml version="1.0" encoding="ISO-8859-9"?>\n'
+            b'<xml>\n'
+            b'    <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n'
+            b'</xml>\n\n'
+        )
         response = XmlResponse('http://www.example.com', body=body)
         self.assertEqual(
             next(self.xmliter(response, 'item')).get(),
diff --git a/tests/test_utils_request.py b/tests/test_utils_request.py
index 45f0f59e4..3b1994019 100644
--- a/tests/test_utils_request.py
+++ b/tests/test_utils_request.py
@@ -75,8 +75,12 @@ class UtilsRequestTest(unittest.TestCase):
         r1 = Request("http://www.example.com/some/page.html?arg=1")
         self.assertEqual(request_httprepr(r1), b'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
 
-        r1 = Request("http://www.example.com", method='POST', headers={"Content-type": b"text/html"}, body=b"Some body")
-        self.assertEqual(request_httprepr(r1), b'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body')
+        r1 = Request("http://www.example.com", method='POST',
+                     headers={"Content-type": b"text/html"}, body=b"Some body")
+        self.assertEqual(
+            request_httprepr(r1),
+            b'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body'
+        )
 
     def test_request_httprepr_for_non_http_request(self):
         # the representation is not important but it must not fail.
diff --git a/tests/test_utils_sitemap.py b/tests/test_utils_sitemap.py
index db323ab31..4c09d33b2 100644
--- a/tests/test_utils_sitemap.py
+++ b/tests/test_utils_sitemap.py
@@ -22,8 +22,14 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>""")
         assert s.type == 'urlset'
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'}, {'priority': '0.8', 'loc': 'http://www.example.com/Special-Offers.html', 'lastmod': '2009-08-16', 'changefreq': 'weekly'}])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'priority': '0.8', 'loc': 'http://www.example.com/Special-Offers.html',
+                 'lastmod': '2009-08-16', 'changefreq': 'weekly'},
+            ]
+        )
 
     def test_sitemap_index(self):
         s = Sitemap(b"""<?xml version="1.0" encoding="UTF-8"?>
@@ -38,7 +44,13 @@ class SitemapTest(unittest.TestCase):
    </sitemap>
 </sitemapindex>""")
         assert s.type == 'sitemapindex'
-        self.assertEqual(list(s), [{'loc': 'http://www.example.com/sitemap1.xml.gz', 'lastmod': '2004-10-01T18:23:17+00:00'}, {'loc': 'http://www.example.com/sitemap2.xml.gz', 'lastmod': '2005-01-01'}])
+        self.assertEqual(
+            list(s),
+            [
+                {'loc': 'http://www.example.com/sitemap1.xml.gz', 'lastmod': '2004-10-01T18:23:17+00:00'},
+                {'loc': 'http://www.example.com/sitemap2.xml.gz', 'lastmod': '2005-01-01'},
+            ]
+        )
 
     def test_sitemap_strip(self):
         """Assert we can deal with trailing spaces inside <loc> tags - we've
@@ -195,11 +207,19 @@ Disallow: /forum/active/
         </url>
     </urlset>""")
 
-        self.assertEqual(list(s), [
-            {'loc': 'http://www.example.com/english/',
-             'alternate': ['http://www.example.com/deutsch/', 'http://www.example.com/schweiz-deutsch/', 'http://www.example.com/english/']
-            }
-        ])
+        self.assertEqual(
+            list(s),
+            [
+                {
+                    'loc': 'http://www.example.com/english/',
+                    'alternate': [
+                        'http://www.example.com/deutsch/',
+                        'http://www.example.com/schweiz-deutsch/',
+                        'http://www.example.com/english/',
+                    ],
+                }
+            ]
+        )
 
     def test_xml_entity_expansion(self):
         s = Sitemap(b"""<?xml version="1.0" encoding="utf-8"?>
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 7abff8281..a12b6dda2 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -28,7 +28,10 @@ class UrlUtilsTest(unittest.TestCase):
         self.assertTrue(url_is_from_any_domain(url, ['192.169.0.15:8080']))
         self.assertFalse(url_is_from_any_domain(url, ['192.169.0.15']))
 
-        url = 'javascript:%20document.orderform_2581_1190810811.mode.value=%27add%27;%20javascript:%20document.orderform_2581_1190810811.submit%28%29'
+        url = (
+            'javascript:%20document.orderform_2581_1190810811.mode.value=%27add%27;%20'
+            'javascript:%20document.orderform_2581_1190810811.submit%28%29'
+        )
         self.assertFalse(url_is_from_any_domain(url, ['testdomain.com']))
         self.assertFalse(url_is_from_any_domain(url + '.testdomain.com', ['testdomain.com']))
 
@@ -105,8 +108,10 @@ class AddHttpIfNoScheme(unittest.TestCase):
                                                'http://username:password@www.example.com')
 
     def test_complete_url(self):
-        self.assertEqual(add_http_if_no_scheme('username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
-                                               'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'
+        )
 
     def test_preserve_http(self):
         self.assertEqual(add_http_if_no_scheme('http://www.example.com'),
@@ -137,8 +142,10 @@ class AddHttpIfNoScheme(unittest.TestCase):
                                                'http://username:password@www.example.com')
 
     def test_preserve_http_complete_url(self):
-        self.assertEqual(add_http_if_no_scheme('http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
-                                               'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'
+        )
 
     def test_protocol_relative(self):
         self.assertEqual(add_http_if_no_scheme('//www.example.com'),
@@ -169,8 +176,10 @@ class AddHttpIfNoScheme(unittest.TestCase):
                                                'http://username:password@www.example.com')
 
     def test_protocol_relative_complete_url(self):
-        self.assertEqual(add_http_if_no_scheme('//username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
-                                               'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('//username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'
+        )
 
     def test_preserve_https(self):
         self.assertEqual(add_http_if_no_scheme('https://www.example.com'),

From 4663f0b9df7f687bf05ca57e5b6e8442b0997df7 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 22 Mar 2020 23:38:51 -0300
Subject: [PATCH 036/181] Update pytest.ini after removing E501

---
 pytest.ini | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 6f7e09478..1b00bfb84 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -33,7 +33,6 @@ flake8-ignore =
     scrapy/commands/fetch.py E401 E128 E731
     scrapy/commands/genspider.py E128 E502
     scrapy/commands/parse.py E128 E731
-    scrapy/commands/runspider.py
     scrapy/commands/settings.py E128
     scrapy/commands/shell.py E128 E502
     scrapy/commands/startproject.py E127 E128
@@ -43,10 +42,8 @@ flake8-ignore =
     scrapy/contracts/default.py E128
     # scrapy/core
     scrapy/core/engine.py E128 E127 E502
-    scrapy/core/scheduler.py
     scrapy/core/scraper.py E128 W504
     scrapy/core/spidermw.py E731 E126
-    scrapy/core/downloader/__init__.py
     scrapy/core/downloader/contextfactory.py E128 E126
     scrapy/core/downloader/middleware.py E502
     scrapy/core/downloader/tls.py E241
@@ -72,7 +69,6 @@ flake8-ignore =
     scrapy/linkextractors/lxmlhtml.py E731
     # scrapy/loader
     scrapy/loader/__init__.py E128
-    scrapy/loader/processors.py
     # scrapy/pipelines
     scrapy/pipelines/files.py E116 E266
     scrapy/pipelines/images.py E265
@@ -87,6 +83,7 @@ flake8-ignore =
     # scrapy/spiders
     scrapy/spiders/__init__.py E402
     # scrapy/utils
+    scrapy/utils/conf.py E402
     scrapy/utils/defer.py E128
     scrapy/utils/deprecate.py E128 E127 E502
     scrapy/utils/gz.py W504
@@ -164,7 +161,5 @@ flake8-ignore =
     tests/test_utils_sitemap.py E128 E124
     tests/test_utils_url.py E127 E125 E241 E126 E123
     tests/test_webclient.py E128 E122 E402 E241 E123 E126
-    tests/test_cmdline/__init__.py
     tests/test_settings/__init__.py E128
     tests/test_spiderloader/__init__.py E128
-    tests/test_utils_misc/__init__.py

From 80c69d68addef99f8b6ea5d3ec894752a06e2a9c Mon Sep 17 00:00:00 2001
From: Aditya <k.aditya00@gmail.com>
Date: Tue, 24 Mar 2020 05:52:07 +0530
Subject: [PATCH 037/181] [docs] refactor python docs links using intersphinx

---
 docs/intro/tutorial.rst               |  5 ++---
 docs/topics/coroutines.rst            |  5 ++---
 docs/topics/downloader-middleware.rst |  4 +++-
 docs/topics/dynamic-content.rst       | 10 ++++++----
 docs/topics/exporters.rst             |  4 ++--
 docs/topics/extensions.rst            |  2 +-
 docs/topics/items.rst                 | 18 ++++++++----------
 docs/topics/logging.rst               | 10 ++++------
 docs/topics/request-response.rst      | 14 ++++++--------
 docs/topics/settings.rst              | 18 +++++++-----------
 10 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/docs/intro/tutorial.rst b/docs/intro/tutorial.rst
index ab6fd4829..5f35dc936 100644
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@@ -287,8 +287,8 @@ to be scraped, you can at least get **some** data.
 
 Besides the :meth:`~scrapy.selector.SelectorList.getall` and
 :meth:`~scrapy.selector.SelectorList.get` methods, you can also use
-the :meth:`~scrapy.selector.SelectorList.re` method to extract using `regular
-expressions`_:
+the :meth:`~scrapy.selector.SelectorList.re` method to extract using
+:doc:`regular expressions <library/re>`:
 
 >>> response.css('title::text').re(r'Quotes.*')
 ['Quotes to Scrape']
@@ -305,7 +305,6 @@ with a selector (see :ref:`topics-developer-tools`).
 `Selector Gadget`_ is also a nice tool to quickly find CSS selector for
 visually selected elements, which works in many browsers.
 
-.. _regular expressions: https://docs.python.org/3/library/re.html
 .. _Selector Gadget: https://selectorgadget.com/
 
 
diff --git a/docs/topics/coroutines.rst b/docs/topics/coroutines.rst
index 487cf4c6c..5f61d6796 100644
--- a/docs/topics/coroutines.rst
+++ b/docs/topics/coroutines.rst
@@ -76,8 +76,8 @@ becomes::
 
 Coroutines may be used to call asynchronous code. This includes other
 coroutines, functions that return Deferreds and functions that return
-`awaitable objects`_ such as :class:`~asyncio.Future`. This means you can use
-many useful Python libraries providing such code::
+:term:`awaitable objects <awaitable>` such as :class:`~asyncio.Future`.
+This means you can use many useful Python libraries providing such code::
 
     class MySpider(Spider):
         # ...
@@ -107,4 +107,3 @@ Common use cases for asynchronous code include:
   :ref:`the screenshot pipeline example<ScreenshotPipeline>`).
 
 .. _aio-libs: https://github.com/aio-libs
-.. _awaitable objects: https://docs.python.org/3/glossary.html#term-awaitable
diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index d7ec53bfa..d309bbc49 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -980,7 +980,7 @@ RobotsTxtMiddleware
     Scrapy ships with support for the following robots.txt_ parsers:
 
     * :ref:`Protego <protego-parser>` (default)
-    * :class:`~urllib.robotparser.RobotFileParser`
+    * :ref:`RobotFileParser <python-robotfileparser>`
     * :ref:`Reppy <reppy-parser>`
     * :ref:`Robotexclusionrulesparser <rerp-parser>`
 
@@ -1028,6 +1028,8 @@ Based on `Protego <https://github.com/scrapy/protego>`_:
 
 Scrapy uses this parser by default.
 
+.. _python-robotfileparser:
+
 RobotFileParser
 ~~~~~~~~~~~~~~~
 
diff --git a/docs/topics/dynamic-content.rst b/docs/topics/dynamic-content.rst
index 22bcac268..a3f0d6ebb 100644
--- a/docs/topics/dynamic-content.rst
+++ b/docs/topics/dynamic-content.rst
@@ -130,8 +130,9 @@ data from it depends on the type of response:
 -   If the response is JavaScript, or HTML with a ``<script/>`` element
     containing the desired data, see :ref:`topics-parsing-javascript`.
 
--   If the response is CSS, use :mod:`re` to extract the desired
-    data from :attr:`response.text <scrapy.http.TextResponse.text>`.
+-   If the response is CSS, use a :doc:`regular expression <library/re>` to
+    extract the desired data from
+    :attr:`response.text <scrapy.http.TextResponse.text>`.
 
 .. _topics-parsing-images:
 
@@ -168,8 +169,9 @@ JavaScript code:
 Once you have a string with the JavaScript code, you can extract the desired
 data from it:
 
--   You might be able to use :mod:`re` to extract the desired
-    data in JSON format, which you can then parse with :func:`json.loads`.
+-   You might be able to use a :doc:`regular expression <library/re>` to
+    extract the desired data in JSON format, which you can then parse with
+    :func:`json.loads`.
 
     For example, if the JavaScript code contains a separate line like
     ``var data = {"field": "value"};`` you can extract that data as follows:
diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index f73c6728d..de8b51195 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -311,7 +311,7 @@ CsvItemExporter
 
    The additional keyword arguments of this ``__init__`` method are passed to the
    :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to the
-   :func:`csv.writer` ``__init__`` method, so you can use any ``csv.writer`` ``__init__`` method
+   :func:`csv.writer` function, so you can use any :func:`csv.writer` function
    argument to customize this exporter.
 
    A typical output of this exporter would be::
@@ -333,7 +333,7 @@ PickleItemExporter
    :param protocol: The pickle protocol to use.
    :type protocol: int
 
-   For more information, refer :mod:`pickle`.
+   For more information, see :mod:`pickle`.
 
    The additional keyword arguments of this ``__init__`` method are passed to the
    :class:`BaseItemExporter` ``__init__`` method.
diff --git a/docs/topics/extensions.rst b/docs/topics/extensions.rst
index 1b8413abf..0fc83e645 100644
--- a/docs/topics/extensions.rst
+++ b/docs/topics/extensions.rst
@@ -364,7 +364,7 @@ Debugger extension
 
 .. class:: Debugger
 
-Invokes a :mod:`Python debugger <pdb>`: inside a running Scrapy process when a `SIGUSR2`_
+Invokes a :doc:`Python debugger <library/pdb>` inside a running Scrapy process when a `SIGUSR2`_
 signal is received. After the debugger is exited, the Scrapy process continues
 running normally.
 
diff --git a/docs/topics/items.rst b/docs/topics/items.rst
index 2e5c88054..78612f524 100644
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@@ -15,8 +15,8 @@ especially in a larger project with many spiders.
 
 To define common output data format Scrapy provides the :class:`Item` class.
 :class:`Item` objects are simple containers used to collect the scraped data.
-They provide a :class:`dict` like API with a convenient syntax for declaring
-their available fields.
+They provide an API similar to :class:`dict` API with a convenient syntax
+for declaring their available fields.
 
 Various Scrapy components use extra information provided by Items:
 exporters look at declared fields to figure out columns to export,
@@ -143,7 +143,7 @@ KeyError: 'Product does not support field: lala'
 Accessing all populated values
 ------------------------------
 
-To access all populated values, just use the typical :class:`dict`:
+To access all populated values, just use the typical :class:`dict` API:
 
 >>> product.keys()
 ['price', 'name']
@@ -160,11 +160,9 @@ Copying items
 To copy an item, you must first decide whether you want a shallow copy or a
 deep copy.
 
-If your item contains mutable_ values like lists or dictionaries, a shallow
-copy will keep references to the same mutable values across all different
-copies.
-
-.. _mutable: https://docs.python.org/3/glossary.html#term-mutable
+If your item contains :term:`mutable` values like lists or dictionaries,
+a shallow copy will keep references to the same mutable values across all
+different copies.
 
 For example, if you have an item with a list of tags, and you create a shallow
 copy of that item, both the original item and the copy have the same list of
@@ -231,8 +229,8 @@ Item objects
 
     Return a new Item optionally initialized from the given argument.
 
-    Items replicate the standard :class:`dict`, including its ``__init__`` method, and
-    also provide the following additional API members:
+    Items replicate the standard :class:`dict` API, including its ``__init__``
+    method, and also provide the following additional API members:
 
     .. automethod:: copy
 
diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
index df631b3dc..675e65ef1 100644
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -82,10 +82,10 @@ path::
 
 .. seealso::
 
-    Module logging, `HowTo <https://docs.python.org/3/howto/logging.html>`_
+    Module logging, :doc:`HowTo <howto/logging>`
         Basic Logging Tutorial
 
-    Module logging, :class:`~logging.Logger`
+    Module logging, :ref:`Loggers <logger>`
         Further documentation on loggers
 
 .. _topics-logging-from-spiders:
@@ -164,10 +164,8 @@ possible levels listed in :ref:`topics-logging-levels`.
 
 :setting:`LOG_FORMAT` and :setting:`LOG_DATEFORMAT` specify formatting strings
 used as layouts for all messages. Those strings can contain any placeholders
-listed in `logging's logrecord attributes docs
-<https://docs.python.org/3/library/logging.html#logrecord-attributes>`_ and
-`datetime's strftime and strptime directives
-<https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior>`_
+listed in :ref:`logging's logrecord attributes docs <logrecord-attributes>` and
+:ref:`datetime's strftime and strptime directives <strftime-strptime-behavior>`
 respectively.
 
 If :setting:`LOG_SHORT_NAMES` is set, then the logs will not display the Scrapy
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 7260141e9..69a51a17c 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -174,9 +174,9 @@ Request objects
         See :ref:`topics-request-meta` for a list of special meta keys
         recognized by Scrapy.
 
-        This dict is `shallow copied`_ when the request is cloned using the
-        ``copy()`` or ``replace()`` methods, and can also be accessed, in your
-        spider, from the ``response.meta`` attribute.
+        This dict is :mod:`shallow copied <copy>` when the request is
+        cloned using the ``copy()`` or ``replace()`` methods, and can also be
+        accessed, in your spider, from the ``response.meta`` attribute.
 
     .. attribute:: Request.cb_kwargs
 
@@ -185,11 +185,9 @@ Request objects
         for new Requests, which means by default callbacks only get a :class:`Response`
         object as argument.
 
-        This dict is `shallow copied`_ when the request is cloned using the
-        ``copy()`` or ``replace()`` methods, and can also be accessed, in your
-        spider, from the ``response.cb_kwargs`` attribute.
-
-    .. _shallow copied: https://docs.python.org/3/library/copy.html
+        This dict is :mod:`shallow copied <copy>` when the request is
+        cloned using the ``copy()`` or ``replace()`` methods, and can also be
+        accessed, in your spider, from the ``response.cb_kwargs`` attribute.
 
     .. method:: Request.copy()
 
diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index d78a6253e..0049dbfca 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -26,9 +26,7 @@ do this by using an environment variable, ``SCRAPY_SETTINGS_MODULE``.
 
 The value of ``SCRAPY_SETTINGS_MODULE`` should be in Python path syntax, e.g.
 ``myproject.settings``. Note that the settings module should be on the
-Python `import search path`_.
-
-.. _import search path: https://docs.python.org/3/tutorial/modules.html#the-module-search-path
+Python :ref:`import search path <tut-searchpath>`.
 
 .. _populating-settings:
 
@@ -899,10 +897,9 @@ LOG_FORMAT
 
 Default: ``'%(asctime)s [%(name)s] %(levelname)s: %(message)s'``
 
-String for formatting log messages. Refer to the `Python logging documentation`_ for the whole list of available
-placeholders.
-
-.. _Python logging documentation: https://docs.python.org/3/library/logging.html#logrecord-attributes
+String for formatting log messages. Refer to the
+:ref:`Python logging documentation <logrecord-attributes>` for the qwhole
+list of available placeholders.
 
 .. setting:: LOG_DATEFORMAT
 
@@ -912,10 +909,9 @@ LOG_DATEFORMAT
 Default: ``'%Y-%m-%d %H:%M:%S'``
 
 String for formatting date/time, expansion of the ``%(asctime)s`` placeholder
-in :setting:`LOG_FORMAT`. Refer to the `Python datetime documentation`_ for the whole list of available
-directives.
-
-.. _Python datetime documentation: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
+in :setting:`LOG_FORMAT`. Refer to the
+:ref:`Python datetime documentation <strftime-strptime-behavior>` for the
+whole list of available directives.
 
 .. setting:: LOG_FORMATTER
 

From ee510cf0d6dda87bd5d472d177c04ceef742d1e3 Mon Sep 17 00:00:00 2001
From: elacuesta <elacuesta@users.noreply.github.com>
Date: Tue, 24 Mar 2020 13:31:44 -0300
Subject: [PATCH 038/181] Update scrapy/spiders/crawl.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Adrián Chaves <adrian@chaves.io>
---
 scrapy/spiders/crawl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/spiders/crawl.py b/scrapy/spiders/crawl.py
index 3d62224bf..cb021a5a7 100644
--- a/scrapy/spiders/crawl.py
+++ b/scrapy/spiders/crawl.py
@@ -55,8 +55,8 @@ class Rule:
         self.process_request_argcount = len(get_func_args(self.process_request))
         if self.process_request_argcount == 1:
             warnings.warn(
-                "Rule.process_request should accept two arguments"
-                " (request, response), accepting only one is deprecated",
+                "Rule.process_request should accept two arguments "
+                "(request, response), accepting only one is deprecated",
                 category=ScrapyDeprecationWarning,
                 stacklevel=2,
             )

From 010edfe85caa72b4c366f2dada0f79f1f91e43ef Mon Sep 17 00:00:00 2001
From: Aditi Dutta <aditi011@e.ntu.edu.sg>
Date: Wed, 25 Mar 2020 14:38:22 -0400
Subject: [PATCH 039/181] [Docs] mention curl2scrapy in Request.from_curl

---
 docs/topics/developer-tools.rst | 3 +++
 docs/topics/dynamic-content.rst | 3 +++
 scrapy/http/request/__init__.py | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/docs/topics/developer-tools.rst b/docs/topics/developer-tools.rst
index f1b0964c6..4e87a00f2 100644
--- a/docs/topics/developer-tools.rst
+++ b/docs/topics/developer-tools.rst
@@ -292,6 +292,9 @@ Alternatively, if you want to know the arguments needed to recreate that
 request you can use the :func:`scrapy.utils.curl.curl_to_request_kwargs`
 function to get a dictionary with the equivalent arguments.
 
+Note that to translate a cURL command into a Scrapy request,
+you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
 As you can see, with a few inspections in the `Network`-tool we
 were able to easily replicate the dynamic requests of the scrolling 
 functionality of the page. Crawling dynamic pages can be quite
diff --git a/docs/topics/dynamic-content.rst b/docs/topics/dynamic-content.rst
index b98133676..aa326868b 100644
--- a/docs/topics/dynamic-content.rst
+++ b/docs/topics/dynamic-content.rst
@@ -104,6 +104,9 @@ If you get the expected response `sometimes`, but not always, the issue is
 probably not your request, but the target server. The target server might be
 buggy, overloaded, or :ref:`banning <bans>` some of your requests.
 
+Note that to translate a cURL command into a Scrapy request,
+you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
 .. _topics-handling-response-formats:
 
 Handling different response formats
diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py
index 6c536cb71..0a6637af8 100644
--- a/scrapy/http/request/__init__.py
+++ b/scrapy/http/request/__init__.py
@@ -129,6 +129,9 @@ class Request(object_ref):
                      :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
                      may modify the :class:`~scrapy.http.Request` object.
 
+        To translate a cURL command into a Scrapy request,
+        you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
        """
         request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
         request_kwargs.update(kwargs)

From b1904729d52d75bcf732b7ddccd7364e6efaa577 Mon Sep 17 00:00:00 2001
From: Aditya <k.aditya00@gmail.com>
Date: Fri, 27 Mar 2020 04:37:26 +0530
Subject: [PATCH 040/181] [docs] change mod to doc redirect link

---
 docs/topics/request-response.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 69a51a17c..573efc05f 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -174,7 +174,7 @@ Request objects
         See :ref:`topics-request-meta` for a list of special meta keys
         recognized by Scrapy.
 
-        This dict is :mod:`shallow copied <copy>` when the request is
+        This dict is :doc:`shallow copied <library/copy>` when the request is
         cloned using the ``copy()`` or ``replace()`` methods, and can also be
         accessed, in your spider, from the ``response.meta`` attribute.
 
@@ -185,7 +185,7 @@ Request objects
         for new Requests, which means by default callbacks only get a :class:`Response`
         object as argument.
 
-        This dict is :mod:`shallow copied <copy>` when the request is
+        This dict is :doc:`shallow copied <library/copy>` when the request is
         cloned using the ``copy()`` or ``replace()`` methods, and can also be
         accessed, in your spider, from the ``response.cb_kwargs`` attribute.
 

From e2d5d357a7ae50eaee957d1a2f8fc8ad1d9f3f24 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 1 Apr 2020 13:45:00 -0300
Subject: [PATCH 041/181] Fix pycodestyle E502

---
 pytest.ini                           | 14 +++++++-------
 scrapy/commands/genspider.py         |  7 +++----
 scrapy/commands/shell.py             |  2 +-
 scrapy/core/downloader/middleware.py | 18 ++++++++++++------
 scrapy/core/engine.py                |  5 ++---
 scrapy/mail.py                       |  4 ++--
 scrapy/utils/deprecate.py            | 15 +++++++++------
 tests/test_pipeline_media.py         |  5 +++--
 8 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 141a13a4f..da0f68e20 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -36,24 +36,24 @@ flake8-ignore =
     scrapy/commands/crawl.py E501
     scrapy/commands/edit.py E501
     scrapy/commands/fetch.py E401 E501 E128 E731
-    scrapy/commands/genspider.py E128 E501 E502
+    scrapy/commands/genspider.py E128 E501
     scrapy/commands/parse.py E128 E501 E731
     scrapy/commands/runspider.py E501
     scrapy/commands/settings.py E128
-    scrapy/commands/shell.py E128 E501 E502
+    scrapy/commands/shell.py E128 E501
     scrapy/commands/startproject.py E127 E501 E128
     scrapy/commands/version.py E501 E128
     # scrapy/contracts
     scrapy/contracts/__init__.py E501 W504
     scrapy/contracts/default.py E128
     # scrapy/core
-    scrapy/core/engine.py E501 E128 E127 E502
+    scrapy/core/engine.py E501 E128 E127
     scrapy/core/scheduler.py E501
     scrapy/core/scraper.py E501 E128 W504
     scrapy/core/spidermw.py E501 E731 E126
     scrapy/core/downloader/__init__.py E501
     scrapy/core/downloader/contextfactory.py E501 E128 E126
-    scrapy/core/downloader/middleware.py E501 E502
+    scrapy/core/downloader/middleware.py E501
     scrapy/core/downloader/tls.py E501 E241
     scrapy/core/downloader/webclient.py E731 E501 E128 E126
     scrapy/core/downloader/handlers/__init__.py E501
@@ -124,7 +124,7 @@ flake8-ignore =
     scrapy/utils/datatypes.py E501
     scrapy/utils/decorators.py E501
     scrapy/utils/defer.py E501 E128
-    scrapy/utils/deprecate.py E128 E501 E127 E502
+    scrapy/utils/deprecate.py E128 E501 E127
     scrapy/utils/gz.py E501 W504
     scrapy/utils/http.py F403
     scrapy/utils/httpobj.py E501
@@ -156,7 +156,7 @@ flake8-ignore =
     scrapy/item.py E501 E128
     scrapy/link.py E501
     scrapy/logformatter.py E501
-    scrapy/mail.py E402 E128 E501 E502
+    scrapy/mail.py E402 E128 E501
     scrapy/middleware.py E128 E501
     scrapy/pqueues.py E501
     scrapy/resolver.py E501
@@ -214,7 +214,7 @@ flake8-ignore =
     tests/test_pipeline_crawl.py E501 E128 E126
     tests/test_pipeline_files.py E501
     tests/test_pipeline_images.py F841 E501
-    tests/test_pipeline_media.py E501 E741 E731 E128 E502
+    tests/test_pipeline_media.py E501 E741 E731 E128
     tests/test_proxy_connect.py E501 E741
     tests/test_request_cb_kwargs.py E501
     tests/test_responsetypes.py E501
diff --git a/scrapy/commands/genspider.py b/scrapy/commands/genspider.py
index adb01fa70..2e837abed 100644
--- a/scrapy/commands/genspider.py
+++ b/scrapy/commands/genspider.py
@@ -90,8 +90,7 @@ class Command(ScrapyCommand):
             'module': module,
             'name': name,
             'domain': domain,
-            'classname': '%sSpider' % ''.join(s.capitalize() \
-                for s in module.split('_'))
+            'classname': '%sSpider' % ''.join(s.capitalize() for s in module.split('_'))
         }
         if self.settings.get('NEWSPIDER_MODULE'):
             spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
@@ -102,8 +101,8 @@ class Command(ScrapyCommand):
         spider_file = "%s.py" % join(spiders_dir, module)
         shutil.copyfile(template_file, spider_file)
         render_templatefile(spider_file, **tvars)
-        print("Created spider %r using template %r " % (name, \
-            template_name), end=('' if spiders_module else '\n'))
+        print("Created spider %r using template %r "
+              % (name, template_name), end=('' if spiders_module else '\n'))
         if spiders_module:
             print("in module:\n  %s.%s" % (spiders_module.__name__, module))
 
diff --git a/scrapy/commands/shell.py b/scrapy/commands/shell.py
index d44a32d5f..5946f21e8 100644
--- a/scrapy/commands/shell.py
+++ b/scrapy/commands/shell.py
@@ -37,7 +37,7 @@ class Command(ScrapyCommand):
             help="evaluate the code in the shell, print the result and exit")
         parser.add_option("--spider", dest="spider",
             help="use this spider")
-        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true",
             default=False, help="do not handle HTTP 3xx status codes and print response as-is")
 
     def update_vars(self, vars):
diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
index 9c0014206..83c7b1f19 100644
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@@ -35,8 +35,10 @@ class DownloaderMiddlewareManager(MiddlewareManager):
             for method in self.methods['process_request']:
                 response = yield deferred_from_coro(method(request=request, spider=spider))
                 if response is not None and not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_request must return None, Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, response.__class__.__name__))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_request must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, response.__class__.__name__)
+                    )
                 if response:
                     defer.returnValue(response)
             defer.returnValue((yield download_func(request=request, spider=spider)))
@@ -50,8 +52,10 @@ class DownloaderMiddlewareManager(MiddlewareManager):
             for method in self.methods['process_response']:
                 response = yield deferred_from_coro(method(request=request, response=response, spider=spider))
                 if not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_response must return Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, type(response)))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_response must return Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                 if isinstance(response, Request):
                     defer.returnValue(response)
             defer.returnValue(response)
@@ -62,8 +66,10 @@ class DownloaderMiddlewareManager(MiddlewareManager):
             for method in self.methods['process_exception']:
                 response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider))
                 if response is not None and not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_exception must return None, Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, type(response)))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_exception must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                 if response:
                     defer.returnValue(response)
             defer.returnValue(_failure)
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index 74f03344e..66cf9ad9a 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -277,10 +277,9 @@ class ExecutionEngine:
         next loop and this function is guaranteed to be called (at least) once
         again for this spider.
         """
-        res = self.signals.send_catch_log(signal=signals.spider_idle, \
+        res = self.signals.send_catch_log(signal=signals.spider_idle,
             spider=spider, dont_log=DontCloseSpider)
-        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
-                for _, x in res):
+        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) for _, x in res):
             return
 
         if self.spider_is_idle(spider):
diff --git a/scrapy/mail.py b/scrapy/mail.py
index 9d186f4f3..9d7896ef6 100644
--- a/scrapy/mail.py
+++ b/scrapy/mail.py
@@ -115,8 +115,8 @@ class MailSender:
         from twisted.mail.smtp import ESMTPSenderFactory
         msg = BytesIO(msg)
         d = defer.Deferred()
-        factory = ESMTPSenderFactory(self.smtpuser, self.smtppass, self.mailfrom, \
-            to_addrs, msg, d, heloFallback=True, requireAuthentication=False, \
+        factory = ESMTPSenderFactory(self.smtpuser, self.smtppass, self.mailfrom,
+            to_addrs, msg, d, heloFallback=True, requireAuthentication=False,
             requireTransportSecurity=self.smtptls)
         factory.noisy = False
 
diff --git a/scrapy/utils/deprecate.py b/scrapy/utils/deprecate.py
index 69334a918..36001d982 100644
--- a/scrapy/utils/deprecate.py
+++ b/scrapy/utils/deprecate.py
@@ -7,9 +7,12 @@ from scrapy.exceptions import ScrapyDeprecationWarning
 
 def attribute(obj, oldattr, newattr, version='0.12'):
     cname = obj.__class__.__name__
-    warnings.warn("%s.%s attribute is deprecated and will be no longer supported "
-        "in Scrapy %s, use %s.%s attribute instead" % \
-        (cname, oldattr, version, cname, newattr), ScrapyDeprecationWarning, stacklevel=3)
+    warnings.warn(
+        "%s.%s attribute is deprecated and will be no longer supported "
+        "in Scrapy %s, use %s.%s attribute instead"
+        % (cname, oldattr, version, cname, newattr),
+        ScrapyDeprecationWarning,
+        stacklevel=3)
 
 
 def create_deprecated_class(name, new_class, clsdict=None,
@@ -17,10 +20,10 @@ def create_deprecated_class(name, new_class, clsdict=None,
                             warn_once=True,
                             old_class_path=None,
                             new_class_path=None,
-                            subclass_warn_message="{cls} inherits from "\
-                                    "deprecated class {old}, please inherit "\
+                            subclass_warn_message="{cls} inherits from "
+                                    "deprecated class {old}, please inherit "
                                     "from {new}.",
-                            instance_warn_message="{cls} is deprecated, "\
+                            instance_warn_message="{cls} is deprecated, "
                                     "instantiate {new} instead."):
     """
     Return a "deprecated" class that causes its subclasses to issue a warning.
diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py
index d369e147d..ee1441227 100644
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@@ -325,8 +325,9 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(True, 'ITSME')])
-        self.assertEqual(self.pipe._mockcalled, \
-                ['get_media_requests', 'media_to_download', 'item_completed'])
+        self.assertEqual(
+            self.pipe._mockcalled,
+            ['get_media_requests', 'media_to_download', 'item_completed'])
 
 
 class MediaPipelineAllowRedirectSettingsTestCase(unittest.TestCase):

From 4270e0a0da66a2cb3a8e904c5ea74f84b7f9d041 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sat, 4 Apr 2020 21:51:02 -0300
Subject: [PATCH 042/181] Fix E731: do not assign a lambda expression

---
 pytest.ini                                 | 24 +++++++++++-----------
 scrapy/commands/fetch.py                   |  4 ++--
 scrapy/commands/parse.py                   |  4 ++--
 scrapy/core/downloader/webclient.py        |  9 ++++----
 scrapy/linkextractors/__init__.py          | 10 +++++++--
 scrapy/linkextractors/lxmlhtml.py          |  6 ++----
 tests/test_downloadermiddleware_cookies.py |  7 +++----
 tests/test_exporters.py                    | 11 +++++-----
 tests/test_pipeline_media.py               | 18 ++++++++++------
 tests/test_utils_python.py                 |  4 +++-
 tests/test_utils_signal.py                 |  4 +++-
 11 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 141a13a4f..4b655b8d5 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -35,9 +35,9 @@ flake8-ignore =
     scrapy/commands/check.py E501
     scrapy/commands/crawl.py E501
     scrapy/commands/edit.py E501
-    scrapy/commands/fetch.py E401 E501 E128 E731
+    scrapy/commands/fetch.py E401 E501 E128
     scrapy/commands/genspider.py E128 E501 E502
-    scrapy/commands/parse.py E128 E501 E731
+    scrapy/commands/parse.py E128 E501
     scrapy/commands/runspider.py E501
     scrapy/commands/settings.py E128
     scrapy/commands/shell.py E128 E501 E502
@@ -50,12 +50,12 @@ flake8-ignore =
     scrapy/core/engine.py E501 E128 E127 E502
     scrapy/core/scheduler.py E501
     scrapy/core/scraper.py E501 E128 W504
-    scrapy/core/spidermw.py E501 E731 E126
+    scrapy/core/spidermw.py E501 E126
     scrapy/core/downloader/__init__.py E501
     scrapy/core/downloader/contextfactory.py E501 E128 E126
     scrapy/core/downloader/middleware.py E501 E502
     scrapy/core/downloader/tls.py E501 E241
-    scrapy/core/downloader/webclient.py E731 E501 E128 E126
+    scrapy/core/downloader/webclient.py E501 E128 E126
     scrapy/core/downloader/handlers/__init__.py E501
     scrapy/core/downloader/handlers/ftp.py E501 E128 E127
     scrapy/core/downloader/handlers/http10.py E501
@@ -90,8 +90,8 @@ flake8-ignore =
     scrapy/http/response/__init__.py E501 E128
     scrapy/http/response/text.py E501 E128 E124
     # scrapy/linkextractors
-    scrapy/linkextractors/__init__.py E731 E501 E402 W504
-    scrapy/linkextractors/lxmlhtml.py E501 E731
+    scrapy/linkextractors/__init__.py E501 E402 W504
+    scrapy/linkextractors/lxmlhtml.py E501
     # scrapy/loader
     scrapy/loader/__init__.py E501 E128
     scrapy/loader/processors.py E501
@@ -184,7 +184,7 @@ flake8-ignore =
     tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
+    tests/test_downloadermiddleware_cookies.py E741 E501 E128 E265 E126
     tests/test_downloadermiddleware_decompression.py E127
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
@@ -197,7 +197,7 @@ flake8-ignore =
     tests/test_downloadermiddleware_stats.py E501
     tests/test_dupefilters.py E501 E741 E128 E124
     tests/test_engine.py E401 E501 E128
-    tests/test_exporters.py E501 E731 E128 E124
+    tests/test_exporters.py E501 E128 E124
     tests/test_extension_telnet.py F841
     tests/test_feedexport.py E501 F841 E241
     tests/test_http_cookies.py E501
@@ -207,14 +207,14 @@ flake8-ignore =
     tests/test_item.py E128 F841
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128 E124
-    tests/test_loader.py E501 E731 E741 E128 E117 E241
+    tests/test_loader.py E501 E741 E128 E117 E241
     tests/test_logformatter.py E128 E501 E122
     tests/test_mail.py E128 E501
     tests/test_middleware.py E501 E128
     tests/test_pipeline_crawl.py E501 E128 E126
     tests/test_pipeline_files.py E501
     tests/test_pipeline_images.py F841 E501
-    tests/test_pipeline_media.py E501 E741 E731 E128 E502
+    tests/test_pipeline_media.py E501 E741 E128 E502
     tests/test_proxy_connect.py E501 E741
     tests/test_request_cb_kwargs.py E501
     tests/test_responsetypes.py E501
@@ -237,11 +237,11 @@ flake8-ignore =
     tests/test_utils_http.py E501 E128 W504
     tests/test_utils_iterators.py E501 E128 E129 E241
     tests/test_utils_log.py E741
-    tests/test_utils_python.py E501 E731
+    tests/test_utils_python.py E501
     tests/test_utils_reqser.py E501 E128
     tests/test_utils_request.py E501 E128
     tests/test_utils_response.py E501
-    tests/test_utils_signal.py E741 F841 E731
+    tests/test_utils_signal.py E741 F841
     tests/test_utils_sitemap.py E128 E501 E124
     tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
     tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
diff --git a/scrapy/commands/fetch.py b/scrapy/commands/fetch.py
index 0e149941d..506d1f1b7 100644
--- a/scrapy/commands/fetch.py
+++ b/scrapy/commands/fetch.py
@@ -49,8 +49,8 @@ class Command(ScrapyCommand):
     def run(self, args, opts):
         if len(args) != 1 or not is_url(args[0]):
             raise UsageError()
-        cb = lambda x: self._print_response(x, opts)
-        request = Request(args[0], callback=cb, dont_filter=True)
+        request = Request(args[0], callback=self._print_response,
+                          cb_kwargs={"opts": opts}, dont_filter=True)
         # by default, let the framework handle redirects,
         # i.e. command handles all codes expect 3xx
         if not opts.no_redirect:
diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index 3ef8ddcb3..d5abe5930 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -147,8 +147,8 @@ class Command(ScrapyCommand):
                 logger.error('Unable to find spider for: %(url)s', {'url': url})
 
         # Request requires callback argument as callable or None, not string
-        request = Request(url, None)
-        _start_requests = lambda s: [self.prepare_request(s, request, opts)]
+        def _start_requests(spider):
+            yield self.prepare_request(spider, Request(url, None), opts)
         self.spidercls.start_requests = _start_requests
 
     def start_parsing(self, url, opts):
diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py
index a71dc5fb3..a90a77b2b 100644
--- a/scrapy/core/downloader/webclient.py
+++ b/scrapy/core/downloader/webclient.py
@@ -14,13 +14,12 @@ from scrapy.responsetypes import responsetypes
 def _parsed_url_args(parsed):
     # Assume parsed is urlparse-d from Request.url,
     # which was passed via safe_url_string and is ascii-only.
-    b = lambda s: to_bytes(s, encoding='ascii')
     path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
-    path = b(path)
-    host = b(parsed.hostname)
+    path = to_bytes(path, encoding="ascii")
+    host = to_bytes(parsed.hostname, encoding="ascii")
     port = parsed.port
-    scheme = b(parsed.scheme)
-    netloc = b(parsed.netloc)
+    scheme = to_bytes(parsed.scheme, encoding="ascii")
+    netloc = to_bytes(parsed.netloc, encoding="ascii")
     if port is None:
         port = 443 if scheme == b'https' else 80
     return scheme, netloc, host, port, path
diff --git a/scrapy/linkextractors/__init__.py b/scrapy/linkextractors/__init__.py
index 6afe867b5..d0b5066b6 100644
--- a/scrapy/linkextractors/__init__.py
+++ b/scrapy/linkextractors/__init__.py
@@ -45,8 +45,14 @@ IGNORED_EXTENSIONS = [
 
 
 _re_type = type(re.compile("", 0))
-_matches = lambda url, regexs: any(r.search(url) for r in regexs)
-_is_valid_url = lambda url: url.split('://', 1)[0] in {'http', 'https', 'file', 'ftp'}
+
+
+def _matches(url, regexs):
+    return any(r.search(url) for r in regexs)
+
+
+def _is_valid_url(url):
+    return url.split('://', 1)[0] in {'http', 'https', 'file', 'ftp'}
 
 
 class FilteringLinkExtractor:
diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py
index fbac1dc59..ceb37c5f1 100644
--- a/scrapy/linkextractors/lxmlhtml.py
+++ b/scrapy/linkextractors/lxmlhtml.py
@@ -98,11 +98,9 @@ class LxmlLinkExtractor(FilteringLinkExtractor):
                  unique=True, process_value=None, deny_extensions=None, restrict_css=(),
                  strip=True, restrict_text=None):
         tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
-        tag_func = lambda x: x in tags
-        attr_func = lambda x: x in attrs
         lx = LxmlParserLinkExtractor(
-            tag=tag_func,
-            attr=attr_func,
+            tag=lambda x: x in tags,
+            attr=lambda x: x in attrs,
             unique=unique,
             process=process_value,
             strip=strip,
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index 051f66680..a8182e2ef 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -13,10 +13,9 @@ from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
 class CookiesMiddlewareTest(TestCase):
 
     def assertCookieValEqual(self, first, second, msg=None):
-        cookievaleq = lambda cv: re.split(r';\s*', cv.decode('latin1'))
-        return self.assertEqual(
-            sorted(cookievaleq(first)),
-            sorted(cookievaleq(second)), msg)
+        def split_cookies(cookies):
+            return sorted(re.split(r";\s*", cookies.decode("latin1")))
+        return self.assertEqual(split_cookies(first), split_cookies(second), msg=msg)
 
     def setUp(self):
         self.spider = Spider('foo')
diff --git a/tests/test_exporters.py b/tests/test_exporters.py
index 6e2507508..160912847 100644
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -215,11 +215,12 @@ class CsvItemExporterTest(BaseItemExporterTest):
         return CsvItemExporter(self.output, **kwargs)
 
     def assertCsvEqual(self, first, second, msg=None):
-        first = to_unicode(first)
-        second = to_unicode(second)
-        csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
-                                for line in csv.splitlines(True)]
-        return self.assertEqual(csvsplit(first), csvsplit(second), msg)
+        def split_csv(csv):
+            return [
+                sorted(re.split(r"(,|\s+)", line))
+                for line in to_unicode(csv).splitlines(True)
+            ]
+        return self.assertEqual(split_csv(first), split_csv(second), msg=msg)
 
     def _check_output(self):
         self.assertCsvEqual(to_unicode(self.output.getvalue()), u'age,name\r\n22,John\xa3\r\n')
diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py
index d369e147d..f84f47816 100644
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@@ -199,12 +199,19 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
 
     pipeline_class = MockedMediaPipeline
 
+    def _callback(self, result):
+        self.pipe._mockcalled.append('request_callback')
+        return result
+
+    def _errback(self, result):
+        self.pipe._mockcalled.append('request_errback')
+        return result
+
     @inlineCallbacks
     def test_result_succeed(self):
-        cb = lambda _: self.pipe._mockcalled.append('request_callback') or _
-        eb = lambda _: self.pipe._mockcalled.append('request_errback') or _
         rsp = Response('http://url1')
-        req = Request('http://url1', meta=dict(response=rsp), callback=cb, errback=eb)
+        req = Request('http://url1', meta=dict(response=rsp),
+                      callback=self._callback, errback=self._errback)
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(True, rsp)])
@@ -215,10 +222,9 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
     @inlineCallbacks
     def test_result_failure(self):
         self.pipe.LOG_FAILED_RESULTS = False
-        cb = lambda _: self.pipe._mockcalled.append('request_callback') or _
-        eb = lambda _: self.pipe._mockcalled.append('request_errback') or _
         fail = Failure(Exception())
-        req = Request('http://url1', meta=dict(response=fail), callback=cb, errback=eb)
+        req = Request('http://url1', meta=dict(response=fail),
+                      callback=self._callback, errback=self._errback)
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(False, fail)])
diff --git a/tests/test_utils_python.py b/tests/test_utils_python.py
index 8cb8df15b..65e6ba876 100644
--- a/tests/test_utils_python.py
+++ b/tests/test_utils_python.py
@@ -145,7 +145,9 @@ class UtilsPythonTestCase(unittest.TestCase):
 
         get_z = operator.itemgetter('z')
         get_meta = operator.attrgetter('meta')
-        compare_z = lambda obj: get_z(get_meta(obj))
+
+        def compare_z(obj):
+            return get_z(get_meta(obj))
 
         self.assertTrue(equal_attributes(a, b, [compare_z, 'x']))
         # fail z equality
diff --git a/tests/test_utils_signal.py b/tests/test_utils_signal.py
index 9f6da09ed..bb211dc60 100644
--- a/tests/test_utils_signal.py
+++ b/tests/test_utils_signal.py
@@ -90,8 +90,10 @@ class SendCatchLogDeferredAsyncioTest(SendCatchLogDeferredTest):
 class SendCatchLogTest2(unittest.TestCase):
 
     def test_error_logged_if_deferred_not_supported(self):
+        def test_handler():
+            return defer.Deferred()
+
         test_signal = object()
-        test_handler = lambda: defer.Deferred()
         dispatcher.connect(test_handler, test_signal)
         with LogCapture() as l:
             send_catch_log(test_signal)

From c887fe37adfe529ed2afabd2d08c3aac00a819c0 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sat, 4 Apr 2020 22:15:36 -0300
Subject: [PATCH 043/181] Simplify parse command

---
 scrapy/commands/parse.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index d5abe5930..1cefed106 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -146,9 +146,8 @@ class Command(ScrapyCommand):
             if not self.spidercls:
                 logger.error('Unable to find spider for: %(url)s', {'url': url})
 
-        # Request requires callback argument as callable or None, not string
         def _start_requests(spider):
-            yield self.prepare_request(spider, Request(url, None), opts)
+            yield self.prepare_request(spider, Request(url), opts)
         self.spidercls.start_requests = _start_requests
 
     def start_parsing(self, url, opts):

From 862f0301e2cb166ca87ce985d51683b46dcf56ad Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 5 Apr 2020 00:53:10 -0300
Subject: [PATCH 044/181] Remove empty _RequestBodyProducer for POST requests

---
 scrapy/core/downloader/handlers/http11.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index c970909d7..09f828419 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -341,20 +341,6 @@ class ScrapyAgent:
             headers.removeHeader(b'Proxy-Authorization')
         if request.body:
             bodyproducer = _RequestBodyProducer(request.body)
-        elif method == b'POST':
-            # Setting Content-Length: 0 even for POST requests is not a
-            # MUST per HTTP RFCs, but it's common behavior, and some
-            # servers require this, otherwise returning HTTP 411 Length required
-            #
-            # RFC 7230#section-3.3.2:
-            # "a Content-Length header field is normally sent in a POST
-            # request even when the value is 0 (indicating an empty payload body)."
-            #
-            # Twisted < 17 will not add "Content-Length: 0" by itself;
-            # Twisted >= 17 fixes this;
-            # Using a producer with an empty-string sends `0` as Content-Length
-            # for all versions of Twisted.
-            bodyproducer = _RequestBodyProducer(b'')
         else:
             bodyproducer = None
         start_time = time()

From c4a5e3f0da3e674ecb7393c0894098984d6aa571 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 6 Apr 2020 09:26:13 -0300
Subject: [PATCH 045/181] Simplify bytes_received signal

Remove "source" parameter
---
 docs/topics/signals.rst                   |  6 +-----
 scrapy/core/downloader/handlers/http11.py | 18 +++++-------------
 scrapy/core/downloader/handlers/s3.py     |  1 -
 tests/test_engine.py                      |  5 +----
 4 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 97be46f2a..02fa6e287 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -163,7 +163,7 @@ bytes_received
 --------------
 
 .. signal:: bytes_received
-.. function:: bytes_received(data, request, spider, source)
+.. function:: bytes_received(data, request, spider)
 
     Sent by the HTTP 1.1 and S3 download handlers when a group of bytes is
     received for a specific request. This signal might be fired multiple
@@ -180,10 +180,6 @@ bytes_received
     :param spider: the spider associated with the response
     :type spider: :class:`~scrapy.spiders.Spider` object
 
-    :param source: a string to identify which handler sent the signal
-        (current values could be "http11" or "s3")
-    :type source: :class:`str` object
-
 spider_closed
 -------------
 
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index c350cd3c2..bda21b6b9 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -33,9 +33,8 @@ logger = logging.getLogger(__name__)
 class HTTP11DownloadHandler:
     lazy = False
 
-    def __init__(self, settings, crawler=None, source="http11"):
+    def __init__(self, settings, crawler=None):
         self._crawler = crawler
-        self._source = source
 
         from twisted.internet import reactor
         self._pool = HTTPConnectionPool(reactor, persistent=True)
@@ -71,8 +70,8 @@ class HTTP11DownloadHandler:
         self._disconnect_timeout = 1
 
     @classmethod
-    def from_crawler(cls, crawler, **kwargs):
-        return cls(crawler.settings, crawler, **kwargs)
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings, crawler)
 
     def download_request(self, request, spider):
         """Return a deferred for the HTTP download"""
@@ -83,7 +82,6 @@ class HTTP11DownloadHandler:
             warnsize=getattr(spider, 'download_warnsize', self._default_warnsize),
             fail_on_dataloss=self._fail_on_dataloss,
             crawler=self._crawler,
-            source=self._source,
         )
         return agent.download_request(request)
 
@@ -281,7 +279,7 @@ class ScrapyAgent:
     _TunnelingAgent = TunnelingAgent
 
     def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None,
-                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None, source=None):
+                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None):
         self._contextFactory = contextFactory
         self._connectTimeout = connectTimeout
         self._bindAddress = bindAddress
@@ -291,7 +289,6 @@ class ScrapyAgent:
         self._fail_on_dataloss = fail_on_dataloss
         self._txresponse = None
         self._crawler = crawler
-        self._source = source
 
     def _get_agent(self, request, timeout):
         from twisted.internet import reactor
@@ -430,7 +427,6 @@ class ScrapyAgent:
                 warnsize=warnsize,
                 fail_on_dataloss=fail_on_dataloss,
                 crawler=self._crawler,
-                source=self._source,
             )
         )
 
@@ -468,9 +464,7 @@ class _RequestBodyProducer:
 
 class _ResponseReader(protocol.Protocol):
 
-    def __init__(
-        self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler, source
-    ):
+    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler):
         self._finished = finished
         self._txresponse = txresponse
         self._request = request
@@ -483,7 +477,6 @@ class _ResponseReader(protocol.Protocol):
         self._bytes_received = 0
         self._certificate = None
         self._crawler = crawler
-        self._source = source
 
     def connectionMade(self):
         if self._certificate is None:
@@ -503,7 +496,6 @@ class _ResponseReader(protocol.Protocol):
             data=bodyBytes,
             request=self._request,
             spider=self._crawler.spider,
-            source=self._source,
         )
 
         if self._maxsize and self._bytes_received > self._maxsize:
diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py
index 2366b6394..40a1fa48e 100644
--- a/scrapy/core/downloader/handlers/s3.py
+++ b/scrapy/core/downloader/handlers/s3.py
@@ -73,7 +73,6 @@ class S3DownloadHandler:
             objcls=httpdownloadhandler,
             settings=settings,
             crawler=crawler,
-            source="s3",
         )
         self._download_http = _http_handler.download_request
 
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 26f3163cf..acfe94f63 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -112,7 +112,6 @@ class CrawlerRun:
         self.itemerror = []
         self.itemresp = []
         self.bytes = defaultdict(lambda: list())
-        self.bytes_source = set()
         self.signals_caught = {}
         self.spider_class = spider_class
 
@@ -166,9 +165,8 @@ class CrawlerRun:
     def item_scraped(self, item, spider, response):
         self.itemresp.append((item, response))
 
-    def bytes_received(self, data, request, spider, source):
+    def bytes_received(self, data, request, spider):
         self.bytes[request].append(data)
-        self.bytes_source.add(source)
 
     def request_scheduled(self, request, spider):
         self.reqplug.append((request, spider))
@@ -281,7 +279,6 @@ class EngineTest(unittest.TestCase):
 
     def _assert_bytes_received(self):
         self.assertEqual(9, len(self.run.bytes))
-        self.assertEqual(self.run.bytes_source, set(["http11"]))
         for request, data in self.run.bytes.items():
             joined_data = b"".join(data)
             if self.run.getpath(request.url) == "/":

From f97fec5ebd34776fdd97220cb2b1eff1f639a409 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 10 Apr 2020 16:02:53 -0300
Subject: [PATCH 046/181] Pin Sphinx version, including extensions

---
 docs/requirements.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 773b92cea..a99d1b78f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
-Sphinx>=2.1
-sphinx-hoverxref
-sphinx-notfound-page
-sphinx_rtd_theme
+Sphinx==3.0.1
+sphinx-hoverxref==0.2b1
+sphinx-notfound-page==0.4
+sphinx_rtd_theme==0.4.3

From 24a1d9acae776bc195e6078394ee159b42275833 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 10 Apr 2020 16:48:42 -0300
Subject: [PATCH 047/181] Get version in docs config

---
 docs/conf.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 6e2399f66..c59688fbe 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -14,6 +14,7 @@
 import sys
 from datetime import datetime
 from os import path
+from pathlib import Path
 
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
@@ -59,10 +60,10 @@ copyright = '2008–{}, Scrapy developers'.format(datetime.now().year)
 #
 # The short X.Y version.
 try:
-    import scrapy
-    version = '.'.join(map(str, scrapy.version_info[:2]))
-    release = scrapy.__version__
-except ImportError:
+    version_path = Path(__file__).parent.absolute().parent.joinpath("scrapy/VERSION")
+    version = version_path.read_text().strip()
+    release = version.rsplit(".", 1)[0]
+except Exception:
     version = ''
     release = ''
 
@@ -295,3 +296,5 @@ intersphinx_mapping = {
 # ------------------------------------
 
 hoverxref_auto_ref = True
+hoverxref_project = "scrapy"
+hoverxref_version = release

From 4383f452999464b623393288361ecf7f383666e2 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 10 Apr 2020 16:49:14 -0300
Subject: [PATCH 048/181] Replace os.path with pathlib in docs config

---
 docs/conf.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index c59688fbe..a0bbbc90a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,14 +13,13 @@
 
 import sys
 from datetime import datetime
-from os import path
 from pathlib import Path
 
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-sys.path.append(path.join(path.dirname(__file__), "_ext"))
-sys.path.insert(0, path.dirname(path.dirname(__file__)))
+sys.path.append(str(Path(__file__).absolute().parent / "_ext"))
+sys.path.insert(0, str(Path(__file__).absolute().parent.parent))
 
 
 # General configuration

From 34e81d0d74fb0c5b9e880afdf214c2fd2ec193c6 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 10 Apr 2020 17:29:02 -0300
Subject: [PATCH 049/181] Docs: remove duplicated setting definitions

---
 docs/topics/downloader-middleware.rst |  1 +
 docs/topics/settings.rst              | 22 ----------------------
 2 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index 73648994d..cea5e4564 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -829,6 +829,7 @@ REDIRECT_MAX_TIMES
 Default: ``20``
 
 The maximum number of redirections that will be followed for a single request.
+After this maximum the request's response is returned as is.
 
 MetaRefreshMiddleware
 ---------------------
diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index dc6843d75..90df9a02e 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -1116,17 +1116,6 @@ multi-purpose thread pool used by various Scrapy components. Threaded
 DNS Resolver, BlockingFeedStorage, S3FilesStore just to name a few. Increase
 this value if you're experiencing problems with insufficient blocking IO.
 
-.. setting:: REDIRECT_MAX_TIMES
-
-REDIRECT_MAX_TIMES
-------------------
-
-Default: ``20``
-
-Defines the maximum times a request can be redirected. After this maximum the
-request's response is returned as is. We used Firefox default value for the
-same task.
-
 .. setting:: REDIRECT_PRIORITY_ADJUST
 
 REDIRECT_PRIORITY_ADJUST
@@ -1422,17 +1411,6 @@ Default: ``True``
 A boolean which specifies if the :ref:`telnet console <topics-telnetconsole>`
 will be enabled (provided its extension is also enabled).
 
-.. setting:: TELNETCONSOLE_PORT
-
-TELNETCONSOLE_PORT
-------------------
-
-Default: ``[6023, 6073]``
-
-The port range to use for the telnet console. If set to ``None`` or ``0``, a
-dynamically assigned port is used. For more info see
-:ref:`topics-telnetconsole`.
-
 .. setting:: TEMPLATES_DIR
 
 TEMPLATES_DIR

From 2205f04631d97103f98a28f865e8ac6511c15c82 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 10 Apr 2020 18:08:04 -0300
Subject: [PATCH 050/181] Docs: Add hoverxref_role_types setting

---
 docs/conf.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index a0bbbc90a..40de81342 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -297,3 +297,10 @@ intersphinx_mapping = {
 hoverxref_auto_ref = True
 hoverxref_project = "scrapy"
 hoverxref_version = release
+hoverxref_role_types = {
+    "class": "tooltip",
+    "confval": "tooltip",
+    "hoverxref": "tooltip",
+    "mod": "tooltip",
+    "ref": "tooltip",
+}

From 83a0cc6cdf4b8d55ebc594f2635beb75d93898cf Mon Sep 17 00:00:00 2001
From: ilias-ant <ilias.antonopoulos@yahoo.gr>
Date: Sun, 12 Apr 2020 23:22:17 +0300
Subject: [PATCH 051/181] Add status to files information

---
 docs/topics/media-pipeline.rst | 13 +++++++++++--
 scrapy/pipelines/files.py      |  4 ++--
 tests/test_pipeline_crawl.py   |  5 +++++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/docs/topics/media-pipeline.rst b/docs/topics/media-pipeline.rst
index cd84905c5..86550d7a4 100644
--- a/docs/topics/media-pipeline.rst
+++ b/docs/topics/media-pipeline.rst
@@ -50,7 +50,7 @@ this:
 4. When the files are downloaded, another field (``files``) will be populated
    with the results. This field will contain a list of dicts with information
    about the downloaded files, such as the downloaded path, the original
-   scraped url (taken from the ``file_urls`` field) , and the file checksum.
+   scraped url (taken from the ``file_urls`` field), the file checksum and the file status.
    The files in the list of the ``files`` field will retain the same order of
    the original ``file_urls`` field. If some file failed downloading, an
    error will be logged and the file won't be present in the ``files`` field.
@@ -470,6 +470,14 @@ See here the methods that you can override in your custom Files Pipeline:
 
         * ``checksum`` - a `MD5 hash`_ of the image contents
 
+        * ``status`` - the file status indication. It can be one of the following:
+
+          * ``downloaded`` - file was downloaded.
+          * ``uptodate`` - file was not downloaded, as it was downloaded recently,
+            according to the file expiration policy.
+          * ``cached`` - file was already scheduled for download, by another item
+            sharing the same file.
+
       The list of tuples received by :meth:`~item_completed` is
       guaranteed to retain the same order of the requests returned from the
       :meth:`~get_media_requests` method.
@@ -479,7 +487,8 @@ See here the methods that you can override in your custom Files Pipeline:
           [(True,
             {'checksum': '2b00042f7481c7b056c4b410d28f33cf',
              'path': 'full/0a79c461a4062ac383dc4fade7bc09f1384a3910.jpg',
-             'url': 'http://www.example.com/files/product1.pdf'}),
+             'url': 'http://www.example.com/files/product1.pdf',
+             'status': 'downloaded'}),
            (False,
             Failure(...))]
 
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index 101bf5fbc..747dfa065 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -417,7 +417,7 @@ class FilesPipeline(MediaPipeline):
             self.inc_stats(info.spider, 'uptodate')
 
             checksum = result.get('checksum', None)
-            return {'url': request.url, 'path': path, 'checksum': checksum}
+            return {'url': request.url, 'path': path, 'checksum': checksum, 'status': 'uptodate'}
 
         path = self.file_path(request, info=info)
         dfd = defer.maybeDeferred(self.store.stat_file, path, info)
@@ -494,7 +494,7 @@ class FilesPipeline(MediaPipeline):
             )
             raise FileException(str(exc))
 
-        return {'url': request.url, 'path': path, 'checksum': checksum}
+        return {'url': request.url, 'path': path, 'checksum': checksum, 'status': status}
 
     def inc_stats(self, spider, status):
         spider.crawler.stats.inc_value('file_count', spider=spider)
diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py
index 962c33144..73b03e9d2 100644
--- a/tests/test_pipeline_crawl.py
+++ b/tests/test_pipeline_crawl.py
@@ -94,6 +94,11 @@ class FileDownloadCrawlTestCase(TestCase):
         file_dl_success = 'File (downloaded): Downloaded file from'
         self.assertEqual(logs.count(file_dl_success), 3)
 
+        # check that the images/files status is `downloaded`
+        for item in items:
+            for i in item[self.media_key]:
+                self.assertEqual(i['status'], 'downloaded')
+
         # check that the images/files checksums are what we know they should be
         if self.expected_checksums is not None:
             checksums = set(

From 1bd8f392c92d5e856332ab99f547d2a4359bd5d1 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 13 Apr 2020 06:12:30 -0300
Subject: [PATCH 052/181] Initial removal of twisted.internet.defer.returnValue

---
 scrapy/core/downloader/middleware.py        | 18 +++++++++---------
 tests/test_feedexport.py                    |  6 +++---
 tests/test_spidermiddleware_output_chain.py |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
index 83c7b1f19..5a03dcdf7 100644
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@@ -40,14 +40,14 @@ class DownloaderMiddlewareManager(MiddlewareManager):
                         % (method.__self__.__class__.__name__, response.__class__.__name__)
                     )
                 if response:
-                    defer.returnValue(response)
-            defer.returnValue((yield download_func(request=request, spider=spider)))
+                    return response
+            return (yield download_func(request=request, spider=spider))
 
         @defer.inlineCallbacks
         def process_response(response):
             assert response is not None, 'Received None in process_response'
             if isinstance(response, Request):
-                defer.returnValue(response)
+                return response
 
             for method in self.methods['process_response']:
                 response = yield deferred_from_coro(method(request=request, response=response, spider=spider))
@@ -57,12 +57,12 @@ class DownloaderMiddlewareManager(MiddlewareManager):
                         % (method.__self__.__class__.__name__, type(response))
                     )
                 if isinstance(response, Request):
-                    defer.returnValue(response)
-            defer.returnValue(response)
+                    return response
+            return response
 
         @defer.inlineCallbacks
-        def process_exception(_failure):
-            exception = _failure.value
+        def process_exception(failure):
+            exception = failure.value
             for method in self.methods['process_exception']:
                 response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider))
                 if response is not None and not isinstance(response, (Response, Request)):
@@ -71,8 +71,8 @@ class DownloaderMiddlewareManager(MiddlewareManager):
                         % (method.__self__.__class__.__name__, type(response))
                     )
                 if response:
-                    defer.returnValue(response)
-            defer.returnValue(_failure)
+                    return response
+            return failure
 
         deferred = mustbe_deferred(process_request, request)
         deferred.addErrback(process_exception)
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
index c5589e52f..e02b0b840 100644
--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@@ -433,7 +433,7 @@ class FeedExportTest(unittest.TestCase):
             for file_path in FEEDS.keys():
                 os.remove(str(file_path))
 
-        defer.returnValue(content)
+        return content
 
     @defer.inlineCallbacks
     def exported_data(self, items, settings):
@@ -448,7 +448,7 @@ class FeedExportTest(unittest.TestCase):
                     yield item
 
         data = yield self.run_and_export(TestSpider, settings)
-        defer.returnValue(data)
+        return data
 
     @defer.inlineCallbacks
     def exported_no_data(self, settings):
@@ -462,7 +462,7 @@ class FeedExportTest(unittest.TestCase):
                 pass
 
         data = yield self.run_and_export(TestSpider, settings)
-        defer.returnValue(data)
+        return data
 
     @defer.inlineCallbacks
     def assertExportedCsv(self, items, header, rows, settings=None, ordered=True):
diff --git a/tests/test_spidermiddleware_output_chain.py b/tests/test_spidermiddleware_output_chain.py
index b26353d6c..ad4d6fb98 100644
--- a/tests/test_spidermiddleware_output_chain.py
+++ b/tests/test_spidermiddleware_output_chain.py
@@ -292,7 +292,7 @@ class TestSpiderMiddleware(TestCase):
         crawler = get_crawler(spider)
         with LogCapture() as log:
             yield crawler.crawl(mockserver=self.mockserver)
-        raise defer.returnValue(log)
+        return log
 
     @defer.inlineCallbacks
     def test_recovery(self):

From 4023d5db33b588b4df861581948e39b41c0d1678 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 13 Apr 2020 06:35:17 -0300
Subject: [PATCH 053/181] Replace _DefGen_Return exception handling

Handle StopIteration instead
---
 scrapy/pipelines/media.py    | 24 +++++++++++++-----------
 tests/test_pipeline_media.py | 11 +++++------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py
index 562d9ee32..a31c37900 100644
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@@ -1,7 +1,7 @@
 import functools
 import logging
 from collections import defaultdict
-from twisted.internet.defer import Deferred, DeferredList, _DefGen_Return
+from twisted.internet.defer import Deferred, DeferredList
 from twisted.python.failure import Failure
 
 from scrapy.settings import Settings
@@ -141,24 +141,26 @@ class MediaPipeline:
             # This code fixes a memory leak by avoiding to keep references to
             # the Request and Response objects on the Media Pipeline cache.
             #
-            # Twisted inline callbacks pass return values using the function
-            # twisted.internet.defer.returnValue, which encapsulates the return
-            # value inside a _DefGen_Return base exception.
-            #
-            # What happens when the media_downloaded callback raises another
+            # What happens when the media_downloaded callback raises an
             # exception, for example a FileException('download-error') when
-            # the Response status code is not 200 OK, is that it stores the
-            # _DefGen_Return exception on the FileException context.
+            # the Response status code is not 200 OK, is that the original
+            # StopIteration exception (which in turn contains the failed
+            # Response and by extension, the original Request) gets encapsulated
+            # within the FileException context.
+            #
+            # Originally, Scrapy was using twisted.internet.defer.returnValue
+            # inside functions decorated with twisted.internet.defer.inlineCallbacks,
+            # encapsulating the returned Response in a _DefGen_Return exception
+            # instead of a StopIteration.
             #
             # To avoid keeping references to the Response and therefore Request
             # objects on the Media Pipeline cache, we should wipe the context of
-            # the exception encapsulated by the Twisted Failure when its a
-            # _DefGen_Return instance.
+            # the encapsulated exception when it is a StopIteration instance
             #
             # This problem does not occur in Python 2.7 since we don't have
             # Exception Chaining (https://www.python.org/dev/peps/pep-3134/).
             context = getattr(result.value, '__context__', None)
-            if isinstance(context, _DefGen_Return):
+            if isinstance(context, StopIteration):
                 setattr(result.value, '__context__', None)
 
         info.downloading.remove(fp)
diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py
index ee1441227..e6e21601b 100644
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@@ -2,7 +2,7 @@ from testfixtures import LogCapture
 from twisted.trial import unittest
 from twisted.python.failure import Failure
 from twisted.internet import reactor
-from twisted.internet.defer import Deferred, inlineCallbacks, returnValue
+from twisted.internet.defer import Deferred, inlineCallbacks
 
 from scrapy.http import Request, Response
 from scrapy.settings import Settings
@@ -124,9 +124,8 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
         # Simulate the Media Pipeline behavior to produce a Twisted Failure
         try:
             # Simulate a Twisted inline callback returning a Response
-            # The returnValue method raises an exception encapsulating the value
-            returnValue(response)
-        except BaseException as exc:
+            raise StopIteration(response)
+        except StopIteration as exc:
             def_gen_return_exc = exc
             try:
                 # Simulate the media_downloaded callback raising a FileException
@@ -140,7 +139,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
 
         # The Failure should encapsulate a FileException ...
         self.assertEqual(failure.value, file_exc)
-        # ... and it should have the returnValue exception set as its context
+        # ... and it should have the StopIteration exception set as its context
         self.assertEqual(failure.value.__context__, def_gen_return_exc)
 
         # Let's calculate the request fingerprint and fake some runtime data...
@@ -155,7 +154,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
         self.assertEqual(info.downloaded[fp], failure)
         # ... encapsulating the original FileException ...
         self.assertEqual(info.downloaded[fp].value, file_exc)
-        # ... but it should not store the returnValue exception on its context
+        # ... but it should not store the StopIteration exception on its context
         context = getattr(info.downloaded[fp].value, '__context__', None)
         self.assertIsNone(context)
 

From 0a4ef97fa3d9d25b3f6b4afcf2b4986c505605c9 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Tue, 14 Apr 2020 14:57:20 -0300
Subject: [PATCH 054/181] Loose restrictions for docs requirements

---
 docs/requirements.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index a99d1b78f..3d34b47da 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
-Sphinx==3.0.1
-sphinx-hoverxref==0.2b1
-sphinx-notfound-page==0.4
-sphinx_rtd_theme==0.4.3
+Sphinx>=3.0
+sphinx-hoverxref>=0.2b1
+sphinx-notfound-page>=0.4
+sphinx_rtd_theme>=0.4

From ee4ee486b1b4f66deffccbbe15f056edaf135982 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Tue, 14 Apr 2020 15:06:54 -0300
Subject: [PATCH 055/181] Revert unnecessary changes to docs/conf.py

---
 docs/conf.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 40de81342..4414ef637 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,13 +13,13 @@
 
 import sys
 from datetime import datetime
-from pathlib import Path
+from os import path
 
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-sys.path.append(str(Path(__file__).absolute().parent / "_ext"))
-sys.path.insert(0, str(Path(__file__).absolute().parent.parent))
+sys.path.append(path.join(path.dirname(__file__), "_ext"))
+sys.path.insert(0, path.dirname(path.dirname(__file__)))
 
 
 # General configuration
@@ -59,10 +59,10 @@ copyright = '2008–{}, Scrapy developers'.format(datetime.now().year)
 #
 # The short X.Y version.
 try:
-    version_path = Path(__file__).parent.absolute().parent.joinpath("scrapy/VERSION")
-    version = version_path.read_text().strip()
-    release = version.rsplit(".", 1)[0]
-except Exception:
+    import scrapy
+    version = '.'.join(map(str, scrapy.version_info[:2]))
+    release = scrapy.__version__
+except ImportError:
     version = ''
     release = ''
 

From 94d7ad76cb96f1623d5944c28db24744955103cd Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <elacuesta@users.noreply.github.com>
Date: Wed, 15 Apr 2020 09:11:37 -0300
Subject: [PATCH 056/181] Fix pycodestyle E2XX (whitespace) (#4468)

---
 pytest.ini                                 |  30 +-
 scrapy/core/downloader/tls.py              |   4 +-
 scrapy/dupefilters.py                      |   2 +-
 scrapy/pipelines/files.py                  |   2 +-
 scrapy/pipelines/images.py                 |   2 +-
 scrapy/pipelines/media.py                  |   2 +-
 tests/test_crawl.py                        |   6 +-
 tests/test_downloadermiddleware_cookies.py |   4 +-
 tests/test_http_response.py                |   4 +-
 tests/test_spidermiddleware_referer.py     | 333 +++++++++++----------
 tests/test_utils_iterators.py              |  50 ++--
 tests/test_utils_url.py                    |  40 +--
 tests/test_webclient.py                    |  30 +-
 13 files changed, 268 insertions(+), 241 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index da0f68e20..de0bccbf1 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -54,7 +54,7 @@ flake8-ignore =
     scrapy/core/downloader/__init__.py E501
     scrapy/core/downloader/contextfactory.py E501 E128 E126
     scrapy/core/downloader/middleware.py E501
-    scrapy/core/downloader/tls.py E501 E241
+    scrapy/core/downloader/tls.py E501
     scrapy/core/downloader/webclient.py E731 E501 E128 E126
     scrapy/core/downloader/handlers/__init__.py E501
     scrapy/core/downloader/handlers/ftp.py E501 E128 E127
@@ -97,9 +97,9 @@ flake8-ignore =
     scrapy/loader/processors.py E501
     # scrapy/pipelines
     scrapy/pipelines/__init__.py E501
-    scrapy/pipelines/files.py E116 E501 E266
-    scrapy/pipelines/images.py E265 E501
-    scrapy/pipelines/media.py E125 E501 E266
+    scrapy/pipelines/files.py E116 E501
+    scrapy/pipelines/images.py E501
+    scrapy/pipelines/media.py E125 E501
     # scrapy/selector
     scrapy/selector/__init__.py F403
     scrapy/selector/unified.py E501 E111
@@ -149,7 +149,7 @@ flake8-ignore =
     scrapy/__init__.py E402 E501
     scrapy/cmdline.py E501
     scrapy/crawler.py E501
-    scrapy/dupefilters.py E501 E202
+    scrapy/dupefilters.py E501
     scrapy/exceptions.py E501
     scrapy/exporters.py E501
     scrapy/interfaces.py E501
@@ -178,13 +178,13 @@ flake8-ignore =
     tests/test_command_shell.py E501 E128
     tests/test_commands.py E128 E501
     tests/test_contracts.py E501 E128
-    tests/test_crawl.py E501 E741 E265
+    tests/test_crawl.py E501 E741
     tests/test_crawler.py F841 E501
     tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
+    tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
+    tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E126
     tests/test_downloadermiddleware_decompression.py E127
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
@@ -199,15 +199,15 @@ flake8-ignore =
     tests/test_engine.py E401 E501 E128
     tests/test_exporters.py E501 E731 E128 E124
     tests/test_extension_telnet.py F841
-    tests/test_feedexport.py E501 F841 E241
+    tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
     tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
-    tests/test_http_response.py E501 E128 E265
+    tests/test_http_response.py E501 E128
     tests/test_item.py E128 F841
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128 E124
-    tests/test_loader.py E501 E731 E741 E128 E117 E241
+    tests/test_loader.py E501 E731 E741 E128 E117
     tests/test_logformatter.py E128 E501 E122
     tests/test_mail.py E128 E501
     tests/test_middleware.py E501 E128
@@ -226,7 +226,7 @@ flake8-ignore =
     tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
     tests/test_spidermiddleware_offsite.py E501 E128 E111
     tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
+    tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
     tests/test_squeues.py E501 E741
     tests/test_utils_asyncio.py E501
     tests/test_utils_conf.py E501 E128
@@ -235,7 +235,7 @@ flake8-ignore =
     tests/test_utils_defer.py E501 F841
     tests/test_utils_deprecate.py F841 E501
     tests/test_utils_http.py E501 E128 W504
-    tests/test_utils_iterators.py E501 E128 E129 E241
+    tests/test_utils_iterators.py E501 E128 E129
     tests/test_utils_log.py E741
     tests/test_utils_python.py E501 E731
     tests/test_utils_reqser.py E501 E128
@@ -243,8 +243,8 @@ flake8-ignore =
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841 E731
     tests/test_utils_sitemap.py E128 E501 E124
-    tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
-    tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
+    tests/test_utils_url.py E501 E127 E125 E501 E126 E123
+    tests/test_webclient.py E501 E128 E122 E402 E123 E126
     tests/test_cmdline/__init__.py E501
     tests/test_settings/__init__.py E501 E128
     tests/test_spiderloader/__init__.py E128 E501
diff --git a/scrapy/core/downloader/tls.py b/scrapy/core/downloader/tls.py
index a1c881d5e..e43a3c83e 100644
--- a/scrapy/core/downloader/tls.py
+++ b/scrapy/core/downloader/tls.py
@@ -20,8 +20,8 @@ METHOD_TLSv12 = 'TLSv1.2'
 
 
 openssl_methods = {
-    METHOD_TLS:    SSL.SSLv23_METHOD,                   # protocol negotiation (recommended)
-    METHOD_SSLv3:  SSL.SSLv3_METHOD,                    # SSL 3 (NOT recommended)
+    METHOD_TLS: SSL.SSLv23_METHOD,                      # protocol negotiation (recommended)
+    METHOD_SSLv3: SSL.SSLv3_METHOD,                     # SSL 3 (NOT recommended)
     METHOD_TLSv10: SSL.TLSv1_METHOD,                    # TLS 1.0 only
     METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5),   # TLS 1.1 only
     METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6),   # TLS 1.2 only
diff --git a/scrapy/dupefilters.py b/scrapy/dupefilters.py
index d74c8ed36..ac5478e7c 100644
--- a/scrapy/dupefilters.py
+++ b/scrapy/dupefilters.py
@@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
     def log(self, request, spider):
         if self.debug:
             msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
-            args = {'request': request, 'referer': referer_str(request) }
+            args = {'request': request, 'referer': referer_str(request)}
             self.logger.debug(msg, args, extra={'spider': spider})
         elif self.logdupes:
             msg = ("Filtered duplicate request: %(request)s"
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index 101bf5fbc..aab645d3d 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline):
         spider.crawler.stats.inc_value('file_count', spider=spider)
         spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
 
-    ### Overridable Interface
+    # Overridable Interface
     def get_media_requests(self, item, info):
         return [Request(x) for x in item.get(self.files_urls_field, [])]
 
diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py
index 2e646379c..aeb520442 100644
--- a/scrapy/pipelines/images.py
+++ b/scrapy/pipelines/images.py
@@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes
 from scrapy.http import Request
 from scrapy.settings import Settings
 from scrapy.exceptions import DropItem
-#TODO: from scrapy.pipelines.media import MediaPipeline
+# TODO: from scrapy.pipelines.media import MediaPipeline
 from scrapy.pipelines.files import FileException, FilesPipeline
 
 
diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py
index 562d9ee32..a6d99fa99 100644
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@@ -166,7 +166,7 @@ class MediaPipeline:
         for wad in info.waiting.pop(fp):
             defer_result(result).chainDeferred(wad)
 
-    ### Overridable Interface
+    # Overridable Interface
     def media_to_download(self, request, info):
         """Check request before starting download"""
         pass
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 3f8a7435c..c02e6a70b 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -147,9 +147,9 @@ class CrawlTestCase(TestCase):
         settings = {"CONCURRENT_REQUESTS": 1}
         crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
         yield crawler.crawl(mockserver=self.mockserver)
-        #self.assertTrue(False, crawler.spider.seedsseen)
-        #self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
-        #                crawler.spider.seedsseen)
+        self.assertTrue(
+            crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
+            crawler.spider.seedsseen)
 
     @defer.inlineCallbacks
     def test_start_requests_dupes(self):
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index 051f66680..f8e4851fc 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase):
         assert self.mw.process_request(req4, self.spider) is None
         self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
 
-        #cookies from hosts with port
+        # cookies from hosts with port
         req5_1 = Request('http://scrapytest.org:1104/')
         assert self.mw.process_request(req5_1, self.spider) is None
 
@@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase):
         assert self.mw.process_request(req5_3, self.spider) is None
         self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
 
-        #skip cookie retrieval for not http request
+        # skip cookie retrieval for not http request
         req6 = Request('file:///scrapy/sometempfile')
         assert self.mw.process_request(req6, self.spider) is None
         self.assertEqual(req6.headers.get('Cookie'), None)
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index eafc3560e..522ec4875 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -438,8 +438,8 @@ class TextResponseTest(BaseResponseTest):
         assert u'<span>value</span>' in r.text, repr(r.text)
 
         # FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
-        #r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
-        #assert u'\ufffd' in r.text, repr(r.text)
+        # r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
+        # assert u'\ufffd' in r.text, repr(r.text)
 
     def test_selector(self):
         body = b"<html><head><title>Some page</title><body></body></html>"
diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py
index 4c6ede70b..742adc64f 100644
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@@ -24,7 +24,7 @@ class TestRefererMiddleware(TestCase):
     resp_headers = {}
     settings = {}
     scenarii = [
-        ('http://scrapytest.org', 'http://scrapytest.org/',  b'http://scrapytest.org'),
+        ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
     ]
 
     def setUp(self):
@@ -54,57 +54,57 @@ class MixinDefault:
     with some additional filtering of s3://
     """
     scenarii = [
-        ('https://example.com/',    'https://scrapy.org/',  b'https://example.com/'),
-        ('http://example.com/',     'http://scrapy.org/',   b'http://example.com/'),
-        ('http://example.com/',     'https://scrapy.org/',  b'http://example.com/'),
-        ('https://example.com/',    'http://scrapy.org/',   None),
+        ('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
+        ('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
+        ('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
+        ('https://example.com/', 'http://scrapy.org/', None),
 
         # no credentials leak
-        ('http://user:password@example.com/',  'https://scrapy.org/', b'http://example.com/'),
+        ('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
 
         # no referrer leak for local schemes
-        ('file:///home/path/to/somefile.html',  'https://scrapy.org/', None),
-        ('file:///home/path/to/somefile.html',  'http://scrapy.org/',  None),
+        ('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
+        ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
 
         # no referrer leak for s3 origins
-        ('s3://mybucket/path/to/data.csv',  'https://scrapy.org/', None),
-        ('s3://mybucket/path/to/data.csv',  'http://scrapy.org/',  None),
+        ('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
+        ('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
     ]
 
 
 class MixinNoReferrer:
     scenarii = [
-        ('https://example.com/page.html',       'https://example.com/', None),
-        ('http://www.example.com/',             'https://scrapy.org/',  None),
-        ('http://www.example.com/',             'http://scrapy.org/',   None),
-        ('https://www.example.com/',            'http://scrapy.org/',   None),
-        ('file:///home/path/to/somefile.html',  'http://scrapy.org/',   None),
+        ('https://example.com/page.html', 'https://example.com/', None),
+        ('http://www.example.com/', 'https://scrapy.org/', None),
+        ('http://www.example.com/', 'http://scrapy.org/', None),
+        ('https://www.example.com/', 'http://scrapy.org/', None),
+        ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
     ]
 
 
 class MixinNoReferrerWhenDowngrade:
     scenarii = [
         # TLS to TLS: send non-empty referrer
-        ('https://example.com/page.html',       'https://not.example.com/', b'https://example.com/page.html'),
-        ('https://example.com/page.html',       'https://scrapy.org/',      b'https://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://scrapy.org/',      b'https://example.com/page.html'),
-        ('https://example.com:444/page.html',   'https://scrapy.org/',      b'https://example.com:444/page.html'),
-        ('ftps://example.com/urls.zip',         'https://scrapy.org/',      b'ftps://example.com/urls.zip'),
+        ('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
+        ('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
+        ('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
+        ('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
 
         # TLS to non-TLS: do not send referrer
-        ('https://example.com/page.html',       'http://not.example.com/',  None),
-        ('https://example.com/page.html',       'http://scrapy.org/',       None),
-        ('ftps://example.com/urls.zip',         'http://scrapy.org/',       None),
+        ('https://example.com/page.html', 'http://not.example.com/', None),
+        ('https://example.com/page.html', 'http://scrapy.org/', None),
+        ('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
 
         # non-TLS to TLS or non-TLS: send referrer
-        ('http://example.com/page.html',        'https://not.example.com/', b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'https://scrapy.org/',      b'http://example.com/page.html'),
-        ('http://example.com:8080/page.html',   'https://scrapy.org/',      b'http://example.com:8080/page.html'),
-        ('http://example.com:80/page.html',     'http://not.example.com/',  b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://scrapy.org/',       b'http://example.com/page.html'),
-        ('http://example.com:443/page.html',    'http://scrapy.org/',       b'http://example.com:443/page.html'),
-        ('ftp://example.com/urls.zip',          'http://scrapy.org/',       b'ftp://example.com/urls.zip'),
-        ('ftp://example.com/urls.zip',          'https://scrapy.org/',      b'ftp://example.com/urls.zip'),
+        ('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
+        ('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
+        ('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
+        ('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
+        ('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
+        ('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
 
         # test for user/password stripping
         ('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
@@ -114,43 +114,43 @@ class MixinNoReferrerWhenDowngrade:
 class MixinSameOrigin:
     scenarii = [
         # Same origin (protocol, host, port): send referrer
-        ('https://example.com/page.html',       'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com:80/page.html',     'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com:80/not-page.html',      b'http://example.com/page.html'),
-        ('http://example.com:8888/page.html',   'http://example.com:8888/not-page.html',    b'http://example.com:8888/page.html'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
 
         # Different host: do NOT send referrer
-        ('https://example.com/page.html',       'https://not.example.com/otherpage.html',   None),
-        ('http://example.com/page.html',        'http://not.example.com/otherpage.html',    None),
-        ('http://example.com/page.html',        'http://www.example.com/otherpage.html',    None),
+        ('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
+        ('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
+        ('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
 
         # Different port: do NOT send referrer
-        ('https://example.com:444/page.html',   'https://example.com/not-page.html',    None),
-        ('http://example.com:81/page.html',     'http://example.com/not-page.html',     None),
-        ('http://example.com/page.html',        'http://example.com:81/not-page.html',  None),
+        ('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
+        ('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
+        ('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
 
         # Different protocols: do NOT send refferer
-        ('https://example.com/page.html',   'http://example.com/not-page.html',     None),
-        ('https://example.com/page.html',   'http://not.example.com/',              None),
-        ('ftps://example.com/urls.zip',     'https://example.com/not-page.html',    None),
-        ('ftp://example.com/urls.zip',      'http://example.com/not-page.html',     None),
-        ('ftps://example.com/urls.zip',     'https://example.com/not-page.html',    None),
+        ('https://example.com/page.html', 'http://example.com/not-page.html', None),
+        ('https://example.com/page.html', 'http://not.example.com/', None),
+        ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
+        ('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
+        ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
 
         # test for user/password stripping
-        ('https://user:password@example.com/page.html', 'https://example.com/not-page.html',    b'https://example.com/page.html'),
-        ('https://user:password@example.com/page.html', 'http://example.com/not-page.html',     None),
+        ('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
     ]
 
 
 class MixinOrigin:
     scenarii = [
         # TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
-        ('https://example.com/page.html',   'https://example.com/not-page.html',    b'https://example.com/'),
-        ('https://example.com/page.html',   'https://scrapy.org',                   b'https://example.com/'),
-        ('https://example.com/page.html',   'http://scrapy.org',                    b'https://example.com/'),
-        ('http://example.com/page.html',    'http://scrapy.org',                    b'http://example.com/'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
+        ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+        ('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
+        ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
 
         # test for user/password stripping
         ('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
@@ -160,129 +160,129 @@ class MixinOrigin:
 class MixinStrictOrigin:
     scenarii = [
         # TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
-        ('https://example.com/page.html',   'https://example.com/not-page.html',    b'https://example.com/'),
-        ('https://example.com/page.html',   'https://scrapy.org',                   b'https://example.com/'),
-        ('http://example.com/page.html',    'http://scrapy.org',                    b'http://example.com/'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
+        ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+        ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
 
         # downgrade: send nothing
-        ('https://example.com/page.html',   'http://scrapy.org',                    None),
+        ('https://example.com/page.html', 'http://scrapy.org', None),
 
         # upgrade: send origin
-        ('http://example.com/page.html',    'https://scrapy.org',                   b'http://example.com/'),
+        ('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
 
         # test for user/password stripping
-        ('https://user:password@example.com/page.html', 'https://scrapy.org',       b'https://example.com/'),
-        ('https://user:password@example.com/page.html', 'http://scrapy.org',        None),
+        ('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+        ('https://user:password@example.com/page.html', 'http://scrapy.org', None),
     ]
 
 
 class MixinOriginWhenCrossOrigin:
     scenarii = [
         # Same origin (protocol, host, port): send referrer
-        ('https://example.com/page.html',       'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com:80/page.html',     'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com:80/not-page.html',      b'http://example.com/page.html'),
-        ('http://example.com:8888/page.html',   'http://example.com:8888/not-page.html',    b'http://example.com:8888/page.html'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
 
         # Different host: send origin as referrer
-        ('https://example2.com/page.html',  'https://scrapy.org/otherpage.html',        b'https://example2.com/'),
-        ('https://example2.com/page.html',  'https://not.example2.com/otherpage.html',  b'https://example2.com/'),
-        ('http://example2.com/page.html',   'http://not.example2.com/otherpage.html',   b'http://example2.com/'),
+        ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
+        ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
+        ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
         # exact match required
-        ('http://example2.com/page.html',   'http://www.example2.com/otherpage.html',   b'http://example2.com/'),
+        ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
 
         # Different port: send origin as referrer
-        ('https://example3.com:444/page.html',  'https://example3.com/not-page.html',   b'https://example3.com:444/'),
-        ('http://example3.com:81/page.html',    'http://example3.com/not-page.html',    b'http://example3.com:81/'),
+        ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
+        ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
 
         # Different protocols: send origin as referrer
-        ('https://example4.com/page.html',  'http://example4.com/not-page.html',    b'https://example4.com/'),
-        ('https://example4.com/page.html',  'http://not.example4.com/',             b'https://example4.com/'),
-        ('ftps://example4.com/urls.zip',    'https://example4.com/not-page.html',   b'ftps://example4.com/'),
-        ('ftp://example4.com/urls.zip',     'http://example4.com/not-page.html',    b'ftp://example4.com/'),
-        ('ftps://example4.com/urls.zip',    'https://example4.com/not-page.html',   b'ftps://example4.com/'),
+        ('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
+        ('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+        ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
 
         # test for user/password stripping
-        ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html',  b'https://example5.com/page.html'),
+        ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
         # TLS to non-TLS downgrade: send origin
-        ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html',   b'https://example5.com/'),
+        ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
     ]
 
 
 class MixinStrictOriginWhenCrossOrigin:
     scenarii = [
         # Same origin (protocol, host, port): send referrer
-        ('https://example.com/page.html',       'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('https://example.com:443/page.html',   'https://example.com/not-page.html',        b'https://example.com/page.html'),
-        ('http://example.com:80/page.html',     'http://example.com/not-page.html',         b'http://example.com/page.html'),
-        ('http://example.com/page.html',        'http://example.com:80/not-page.html',      b'http://example.com/page.html'),
-        ('http://example.com:8888/page.html',   'http://example.com:8888/not-page.html',    b'http://example.com:8888/page.html'),
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
 
         # Different host: send origin as referrer
-        ('https://example2.com/page.html',  'https://scrapy.org/otherpage.html',        b'https://example2.com/'),
-        ('https://example2.com/page.html',  'https://not.example2.com/otherpage.html',  b'https://example2.com/'),
-        ('http://example2.com/page.html',   'http://not.example2.com/otherpage.html',   b'http://example2.com/'),
+        ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
+        ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
+        ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
         # exact match required
-        ('http://example2.com/page.html',   'http://www.example2.com/otherpage.html',   b'http://example2.com/'),
+        ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
 
         # Different port: send origin as referrer
-        ('https://example3.com:444/page.html',  'https://example3.com/not-page.html',   b'https://example3.com:444/'),
-        ('http://example3.com:81/page.html',    'http://example3.com/not-page.html',    b'http://example3.com:81/'),
+        ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
+        ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
 
         # downgrade
-        ('https://example4.com/page.html',  'http://example4.com/not-page.html',    None),
-        ('https://example4.com/page.html',  'http://not.example4.com/',             None),
+        ('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
+        ('https://example4.com/page.html', 'http://not.example4.com/', None),
 
         # non-TLS to non-TLS
-        ('ftp://example4.com/urls.zip',     'http://example4.com/not-page.html',    b'ftp://example4.com/'),
+        ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
 
         # upgrade
-        ('http://example4.com/page.html',  'https://example4.com/not-page.html',    b'http://example4.com/'),
-        ('http://example4.com/page.html',  'https://not.example4.com/',             b'http://example4.com/'),
+        ('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
+        ('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
 
         # Different protocols: send origin as referrer
-        ('ftps://example4.com/urls.zip',    'https://example4.com/not-page.html',   b'ftps://example4.com/'),
-        ('ftps://example4.com/urls.zip',    'https://example4.com/not-page.html',   b'ftps://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
 
         # test for user/password stripping
-        ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html',  b'https://example5.com/page.html'),
+        ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
 
         # TLS to non-TLS downgrade: send nothing
-        ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html',   None),
+        ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
     ]
 
 
 class MixinUnsafeUrl:
     scenarii = [
         # TLS to TLS: send referrer
-        ('https://example.com/sekrit.html',     'http://not.example.com/',      b'https://example.com/sekrit.html'),
-        ('https://example1.com/page.html',      'https://not.example1.com/',    b'https://example1.com/page.html'),
-        ('https://example1.com/page.html',      'https://scrapy.org/',          b'https://example1.com/page.html'),
-        ('https://example1.com:443/page.html',  'https://scrapy.org/',          b'https://example1.com/page.html'),
-        ('https://example1.com:444/page.html',  'https://scrapy.org/',          b'https://example1.com:444/page.html'),
-        ('ftps://example1.com/urls.zip',        'https://scrapy.org/',          b'ftps://example1.com/urls.zip'),
+        ('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
+        ('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
+        ('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
+        ('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
+        ('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
+        ('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
 
         # TLS to non-TLS: send referrer (yes, it's unsafe)
-        ('https://example2.com/page.html',  'http://not.example2.com/', b'https://example2.com/page.html'),
-        ('https://example2.com/page.html',  'http://scrapy.org/',       b'https://example2.com/page.html'),
-        ('ftps://example2.com/urls.zip',    'http://scrapy.org/',       b'ftps://example2.com/urls.zip'),
+        ('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
+        ('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
+        ('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
 
         # non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
-        ('http://example3.com/page.html',       'https://not.example3.com/',    b'http://example3.com/page.html'),
-        ('http://example3.com/page.html',       'https://scrapy.org/',          b'http://example3.com/page.html'),
-        ('http://example3.com:8080/page.html',  'https://scrapy.org/',          b'http://example3.com:8080/page.html'),
-        ('http://example3.com:80/page.html',    'http://not.example3.com/',     b'http://example3.com/page.html'),
-        ('http://example3.com/page.html',       'http://scrapy.org/',           b'http://example3.com/page.html'),
-        ('http://example3.com:443/page.html',   'http://scrapy.org/',           b'http://example3.com:443/page.html'),
-        ('ftp://example3.com/urls.zip',         'http://scrapy.org/',           b'ftp://example3.com/urls.zip'),
-        ('ftp://example3.com/urls.zip',         'https://scrapy.org/',          b'ftp://example3.com/urls.zip'),
+        ('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
+        ('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
+        ('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
+        ('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
+        ('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
+        ('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
+        ('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
+        ('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
 
         # test for user/password stripping
-        ('http://user:password@example4.com/page.html',     'https://not.example4.com/',    b'http://example4.com/page.html'),
-        ('https://user:password@example4.com/page.html',    'http://scrapy.org/',           b'https://example4.com/page.html'),
+        ('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
+        ('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
     ]
 
 
@@ -339,12 +339,12 @@ class CustomPythonOrgPolicy(ReferrerPolicy):
 class TestSettingsCustomPolicy(TestRefererMiddleware):
     settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
     scenarii = [
-        ('https://example.com/',    'https://scrapy.org/',  b'https://python.org/'),
-        ('http://example.com/',     'http://scrapy.org/',   b'http://python.org/'),
-        ('http://example.com/',     'https://scrapy.org/',  b'https://python.org/'),
-        ('https://example.com/',    'http://scrapy.org/',   b'http://python.org/'),
-        ('file:///home/path/to/somefile.html',  'https://scrapy.org/', b'https://python.org/'),
-        ('file:///home/path/to/somefile.html',  'http://scrapy.org/',  b'http://python.org/'),
+        ('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
+        ('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
+        ('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
+        ('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
+        ('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
+        ('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
 
     ]
 
@@ -541,7 +541,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
 
     settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
     scenarii = [
-        (   'http://scrapytest.org/1',      # parent
+        (
+            'http://scrapytest.org/1',      # parent
             'http://scrapytest.org/2',      # target
             (
                 # redirections: code, URL
@@ -551,7 +552,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
             b'http://scrapytest.org/1',  # expected initial referer
             b'http://scrapytest.org/1',  # expected referer for the redirection request
         ),
-        (   'https://scrapytest.org/1',
+        (
+            'https://scrapytest.org/1',
             'https://scrapytest.org/2',
             (
                 # redirecting to non-secure URL
@@ -560,7 +562,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
             b'https://scrapytest.org/1',
             b'https://scrapytest.org/1',
         ),
-        (   'https://scrapytest.org/1',
+        (
+            'https://scrapytest.org/1',
             'https://scrapytest.com/2',
             (
                 # redirecting to non-secure URL: different origin
@@ -602,7 +605,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
     """
     settings = {'REFERRER_POLICY': 'no-referrer'}
     scenarii = [
-        (   'http://scrapytest.org/1',      # parent
+        (
+            'http://scrapytest.org/1',      # parent
             'http://scrapytest.org/2',      # target
             (
                 # redirections: code, URL
@@ -612,7 +616,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
             None,  # expected initial "Referer"
             None,  # expected "Referer" for the redirection request
         ),
-        (   'https://scrapytest.org/1',
+        (
+            'https://scrapytest.org/1',
             'https://scrapytest.org/2',
             (
                 (301, 'http://scrapytest.org/3'),
@@ -620,7 +625,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
             None,
             None,
         ),
-        (   'https://scrapytest.org/1',
+        (
+            'https://scrapytest.org/1',
             'https://example.com/2',    # different origin
             (
                 (301, 'http://scrapytest.com/3'),
@@ -641,7 +647,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
     """
     settings = {'REFERRER_POLICY': 'same-origin'}
     scenarii = [
-        (   'http://scrapytest.org/101',      # origin
+        (
+            'http://scrapytest.org/101',      # origin
             'http://scrapytest.org/102',      # target
             (
                 # redirections: code, URL
@@ -651,7 +658,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
             b'http://scrapytest.org/101',  # expected initial "Referer"
             b'http://scrapytest.org/101',  # expected referer for the redirection request
         ),
-        (   'https://scrapytest.org/201',
+        (
+            'https://scrapytest.org/201',
             'https://scrapytest.org/202',
             (
                 # redirecting from secure to non-secure URL == different origin
@@ -660,7 +668,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/201',
             None,
         ),
-        (   'https://scrapytest.org/301',
+        (
+            'https://scrapytest.org/301',
             'https://scrapytest.org/302',
             (
                 # different domain == different origin
@@ -683,7 +692,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
     """
     settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
     scenarii = [
-        (   'http://scrapytest.org/101',
+        (
+            'http://scrapytest.org/101',
             'http://scrapytest.org/102',
             (
                 (301, 'http://scrapytest.org/103'),
@@ -692,7 +702,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
             b'http://scrapytest.org/',  # send origin
             b'http://scrapytest.org/',  # redirects to same origin: send origin
         ),
-        (   'https://scrapytest.org/201',
+        (
+            'https://scrapytest.org/201',
             'https://scrapytest.org/202',
             (
                 # redirecting to non-secure URL: no referrer
@@ -701,7 +712,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/',
             None,
         ),
-        (   'https://scrapytest.org/301',
+        (
+            'https://scrapytest.org/301',
             'https://scrapytest.org/302',
             (
                 # redirecting to non-secure URL (different domain): no referrer
@@ -710,7 +722,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/',
             None,
         ),
-        (   'http://scrapy.org/401',
+        (
+            'http://scrapy.org/401',
             'http://example.com/402',
             (
                 (301, 'http://scrapytest.org/403'),
@@ -718,7 +731,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
             b'http://scrapy.org/',
             b'http://scrapy.org/',
         ),
-        (   'https://scrapy.org/501',
+        (
+            'https://scrapy.org/501',
             'https://example.com/502',
             (
                 # HTTPS all along, so origin referrer is kept as-is
@@ -728,7 +742,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
             b'https://scrapy.org/',
             b'https://scrapy.org/',
         ),
-        (   'https://scrapytest.org/601',
+        (
+            'https://scrapytest.org/601',
             'http://scrapytest.org/602',                # TLS to non-TLS: no referrer
             (
                 (301, 'https://scrapytest.org/603'),    # TLS URL again: (still) no referrer
@@ -750,7 +765,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
     """
     settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
     scenarii = [
-        (   'http://scrapytest.org/101',      # origin
+        (
+            'http://scrapytest.org/101',      # origin
             'http://scrapytest.org/102',      # target + redirection
             (
                 # redirections: code, URL
@@ -760,7 +776,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'http://scrapytest.org/101',  # expected initial referer
             b'http://scrapytest.org/101',  # expected referer for the redirection request
         ),
-        (   'https://scrapytest.org/201',
+        (
+            'https://scrapytest.org/201',
             'https://scrapytest.org/202',
             (
                 # redirecting to non-secure URL: send origin
@@ -769,7 +786,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/201',
             b'https://scrapytest.org/',
         ),
-        (   'https://scrapytest.org/301',
+        (
+            'https://scrapytest.org/301',
             'https://scrapytest.org/302',
             (
                 # redirecting to non-secure URL (different domain): send origin
@@ -778,7 +796,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/301',
             b'https://scrapytest.org/',
         ),
-        (   'http://scrapy.org/401',
+        (
+            'http://scrapy.org/401',
             'http://example.com/402',
             (
                 (301, 'http://scrapytest.org/403'),
@@ -786,7 +805,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'http://scrapy.org/',
             b'http://scrapy.org/',
         ),
-        (   'https://scrapy.org/501',
+        (
+            'https://scrapy.org/501',
             'https://example.com/502',
             (
                 # all different domains: send origin
@@ -796,7 +816,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'https://scrapy.org/',
             b'https://scrapy.org/',
         ),
-        (   'https://scrapytest.org/301',
+        (
+            'https://scrapytest.org/301',
             'http://scrapytest.org/302',                # TLS to non-TLS: send origin
             (
                 (301, 'https://scrapytest.org/303'),    # TLS URL again: send origin (also)
@@ -820,7 +841,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
     """
     settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
     scenarii = [
-        (   'http://scrapytest.org/101',      # origin
+        (
+            'http://scrapytest.org/101',      # origin
             'http://scrapytest.org/102',      # target + redirection
             (
                 # redirections: code, URL
@@ -830,7 +852,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'http://scrapytest.org/101',  # expected initial referer
             b'http://scrapytest.org/101',  # expected referer for the redirection request
         ),
-        (   'https://scrapytest.org/201',
+        (
+            'https://scrapytest.org/201',
             'https://scrapytest.org/202',
             (
                 # redirecting to non-secure URL: do not send the "Referer" header
@@ -839,7 +862,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/201',
             None,
         ),
-        (   'https://scrapytest.org/301',
+        (
+            'https://scrapytest.org/301',
             'https://scrapytest.org/302',
             (
                 # redirecting to non-secure URL (different domain): send origin
@@ -848,7 +872,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'https://scrapytest.org/301',
             None,
         ),
-        (   'http://scrapy.org/401',
+        (
+            'http://scrapy.org/401',
             'http://example.com/402',
             (
                 (301, 'http://scrapytest.org/403'),
@@ -856,7 +881,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'http://scrapy.org/',
             b'http://scrapy.org/',
         ),
-        (   'https://scrapy.org/501',
+        (
+            'https://scrapy.org/501',
             'https://example.com/502',
             (
                 # all different domains: send origin
@@ -866,7 +892,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
             b'https://scrapy.org/',
             b'https://scrapy.org/',
         ),
-        (   'https://scrapytest.org/601',
+        (
+            'https://scrapytest.org/601',
             'http://scrapytest.org/602',                # TLS to non-TLS: do not send "Referer"
             (
                 (301, 'https://scrapytest.org/603'),    # TLS URL again: (still) send nothing
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index 33fc4d570..ec8311298 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -250,10 +250,10 @@ class UtilsCsvTestCase(unittest.TestCase):
 
         result = [row for row in csv]
         self.assertEqual(result,
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
         # explicit type check cuz' we no like stinkin' autocasting! yarrr
         for result_row in result:
@@ -266,10 +266,10 @@ class UtilsCsvTestCase(unittest.TestCase):
         csv = csviter(response, delimiter='\t')
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
     def test_csviter_quotechar(self):
         body1 = get_testdata('feeds', 'feed-sample6.csv')
@@ -279,19 +279,19 @@ class UtilsCsvTestCase(unittest.TestCase):
         csv1 = csviter(response1, quotechar="'")
 
         self.assertEqual([row for row in csv1],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
         response2 = TextResponse(url="http://example.com/", body=body2)
         csv2 = csviter(response2, delimiter="|", quotechar="'")
 
         self.assertEqual([row for row in csv2],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
     def test_csviter_wrong_quotechar(self):
         body = get_testdata('feeds', 'feed-sample6.csv')
@@ -299,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
         csv = csviter(response)
 
         self.assertEqual([row for row in csv],
-                         [{u"'id'": u"1",   u"'name'": u"'alpha'",   u"'value'": u"'foobar'"},
-                          {u"'id'": u"2",   u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
-                          {u"'id'": u"'3'", u"'name'": u"'multi'",   u"'value'": u"'foo"},
-                          {u"'id'": u"4",   u"'name'": u"'empty'",   u"'value'": u""}])
+                         [{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
+                          {u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
+                          {u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
+                          {u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
 
     def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
         body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
@@ -310,10 +310,10 @@ class UtilsCsvTestCase(unittest.TestCase):
         csv = csviter(response, delimiter='\t')
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
     def test_csviter_headers(self):
         sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
@@ -323,10 +323,10 @@ class UtilsCsvTestCase(unittest.TestCase):
         csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': u'foo\nbar'},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
     def test_csviter_falserow(self):
         body = get_testdata('feeds', 'feed-sample3.csv')
@@ -336,10 +336,10 @@ class UtilsCsvTestCase(unittest.TestCase):
         csv = csviter(response)
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
+                         [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
                           {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                          {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+                          {u'id': u'4', u'name': u'empty', u'value': u''}])
 
     def test_csviter_exception(self):
         body = get_testdata('feeds', 'feed-sample3.csv')
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 7abff8281..72a16e9b1 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -203,29 +203,29 @@ def create_skipped_scheme_t(args):
 
 
 for k, args in enumerate([
-            ('/index',                              'file://'),
-            ('/index.html',                         'file://'),
-            ('./index.html',                        'file://'),
-            ('../index.html',                       'file://'),
-            ('../../index.html',                    'file://'),
-            ('./data/index.html',                   'file://'),
-            ('.hidden/data/index.html',             'file://'),
-            ('/home/user/www/index.html',           'file://'),
-            ('//home/user/www/index.html',          'file://'),
-            ('file:///home/user/www/index.html',    'file://'),
+            ('/index', 'file://'),
+            ('/index.html', 'file://'),
+            ('./index.html', 'file://'),
+            ('../index.html', 'file://'),
+            ('../../index.html', 'file://'),
+            ('./data/index.html', 'file://'),
+            ('.hidden/data/index.html', 'file://'),
+            ('/home/user/www/index.html', 'file://'),
+            ('//home/user/www/index.html', 'file://'),
+            ('file:///home/user/www/index.html', 'file://'),
 
-            ('index.html',                          'http://'),
-            ('example.com',                         'http://'),
-            ('www.example.com',                     'http://'),
-            ('www.example.com/index.html',          'http://'),
-            ('http://example.com',                  'http://'),
-            ('http://example.com/index.html',       'http://'),
-            ('localhost',                           'http://'),
-            ('localhost/index.html',                'http://'),
+            ('index.html', 'http://'),
+            ('example.com', 'http://'),
+            ('www.example.com', 'http://'),
+            ('www.example.com/index.html', 'http://'),
+            ('http://example.com', 'http://'),
+            ('http://example.com/index.html', 'http://'),
+            ('localhost', 'http://'),
+            ('localhost/index.html', 'http://'),
 
             # some corner cases (default to http://)
-            ('/',                                   'http://'),
-            ('.../test',                            'http://'),
+            ('/', 'http://'),
+            ('.../test', 'http://'),
 
         ], start=1):
     t_method = create_guess_scheme_t(args)
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index 6253d5c3f..d4abebbfb 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -53,28 +53,28 @@ class ParseUrlTestCase(unittest.TestCase):
     def testParse(self):
         lip = '127.0.0.1'
         tests = (
-    ("http://127.0.0.1?c=v&c2=v2#fragment",     ('http', lip, lip, 80, '/?c=v&c2=v2')),
-    ("http://127.0.0.1/?c=v&c2=v2#fragment",    ('http', lip, lip, 80, '/?c=v&c2=v2')),
-    ("http://127.0.0.1/foo?c=v&c2=v2#frag",     ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
+    ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+    ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+    ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
     ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
-    ("http://127.0.0.1:100/?c=v&c2=v2#frag",    ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
+    ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
     ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
 
-    ("http://127.0.0.1",              ('http', lip, lip, 80, '/')),
-    ("http://127.0.0.1/",             ('http', lip, lip, 80, '/')),
-    ("http://127.0.0.1/foo",          ('http', lip, lip, 80, '/foo')),
-    ("http://127.0.0.1?param=value",  ('http', lip, lip, 80, '/?param=value')),
+    ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
+    ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
+    ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
+    ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
     ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
-    ("http://127.0.0.1:12345/foo",    ('http', lip + ':12345', lip, 12345, '/foo')),
-    ("http://spam:12345/foo",         ('http', 'spam:12345', 'spam', 12345, '/foo')),
-    ("http://spam.test.org/foo",      ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
+    ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
+    ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
+    ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
 
-    ("https://127.0.0.1/foo",         ('https', lip, lip, 443, '/foo')),
+    ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
     ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
-    ("https://127.0.0.1:12345/",      ('https', lip + ':12345', lip, 12345, '/')),
+    ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
 
-    ("http://scrapytest.org/foo ",    ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
-    ("http://egg:7890 ",              ('http', 'egg:7890', 'egg', 7890, '/')),
+    ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
+    ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
     )
 
         for url, test in tests:

From c3257dc610ccdd963fae8dda330fa337deb53054 Mon Sep 17 00:00:00 2001
From: santoshkosgi <santoshkosgi@gmail.com>
Date: Wed, 15 Apr 2020 17:54:10 +0530
Subject: [PATCH 057/181] Change Content-type to Content-Type (#4481)

Co-authored-by: santosh <santosh@acalvio.com>
---
 scrapy/responsetypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py
index ad89d9d22..7c5eeac21 100644
--- a/scrapy/responsetypes.py
+++ b/scrapy/responsetypes.py
@@ -71,7 +71,7 @@ class ResponseTypes:
         cls = Response
         if b'Content-Type' in headers:
             cls = self.from_content_type(
-                content_type=headers[b'Content-type'],
+                content_type=headers[b'Content-Type'],
                 content_encoding=headers.get(b'Content-Encoding')
             )
         if cls is Response and b'Content-Disposition' in headers:

From f242751b625c4a559d9e80f62a79223f911a8551 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 15 Apr 2020 09:39:28 -0300
Subject: [PATCH 058/181] Remove empty line

---
 pytest.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytest.ini b/pytest.ini
index 47f34e8f1..a9014ae55 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -68,7 +68,6 @@ flake8-ignore =
     # scrapy/loader
     scrapy/loader/__init__.py E128
     # scrapy/pipelines
-
     scrapy/pipelines/files.py E116
     scrapy/pipelines/media.py E125
     # scrapy/selector

From ac869181fb9118eb3e2dd0cb938cb1c8271bf6fc Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <elacuesta@users.noreply.github.com>
Date: Wed, 15 Apr 2020 13:42:35 -0300
Subject: [PATCH 059/181] Update docs/topics/downloader-middleware.rst
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Adrián Chaves <adrian@chaves.io>
---
 docs/topics/downloader-middleware.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index cea5e4564..8d3ea51f3 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -829,7 +829,7 @@ REDIRECT_MAX_TIMES
 Default: ``20``
 
 The maximum number of redirections that will be followed for a single request.
-After this maximum the request's response is returned as is.
+After this maximum, the request's response is returned as is.
 
 MetaRefreshMiddleware
 ---------------------

From 47a992615a046d20c31bfca6f8b65c3194e9fd30 Mon Sep 17 00:00:00 2001
From: Victor Torres <vpaivatorres@gmail.com>
Date: Wed, 15 Apr 2020 19:57:34 -0300
Subject: [PATCH 060/181] serialize requests with callback references as spider
 attribute

You could define a spider attribute that references a callback method
but if this method has a different name than your spider attribute,
the request serializer is not able to find it on the spider class.

With this commit we're fixing this behavior as we're searching for
callback references in the spider object itself instead of looking
for attributes with the same function's name, that could be different.
---
 scrapy/utils/reqser.py     | 12 ++++++++----
 tests/test_utils_reqser.py | 40 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/scrapy/utils/reqser.py b/scrapy/utils/reqser.py
index 749bbc387..78e13ec10 100644
--- a/scrapy/utils/reqser.py
+++ b/scrapy/utils/reqser.py
@@ -1,6 +1,8 @@
 """
 Helper functions for serializing (and deserializing) requests.
 """
+import inspect
+
 from scrapy.http import Request
 from scrapy.utils.python import to_unicode
 from scrapy.utils.misc import load_object
@@ -90,10 +92,12 @@ def _find_method(obj, func):
             pass
         else:
             if func_self is obj:
-                name = func.__func__.__name__
-                if _is_private_method(name):
-                    return _mangle_private_name(obj, func, name)
-                return name
+                members = inspect.getmembers(obj, predicate=inspect.ismethod)
+                for name, obj_func in members:
+                    if obj_func.__func__ is func.__func__:
+                        if _is_private_method(name):
+                            return _mangle_private_name(obj, func, name)
+                        return name
     raise ValueError("Function %s is not a method of: %s" % (func, obj))
 
 
diff --git a/tests/test_utils_reqser.py b/tests/test_utils_reqser.py
index c7572f02c..cf84f8fbd 100644
--- a/tests/test_utils_reqser.py
+++ b/tests/test_utils_reqser.py
@@ -69,6 +69,26 @@ class RequestSerializationTest(unittest.TestCase):
                     errback=self.spider.handle_error)
         self._assert_serializes_ok(r, spider=self.spider)
 
+    def test_reference_callback_serialization(self):
+        r = Request("http://www.example.com",
+                    callback=self.spider.parse_item_reference,
+                    errback=self.spider.handle_error_reference)
+        self._assert_serializes_ok(r, spider=self.spider)
+        request_dict = request_to_dict(r, self.spider)
+        self.assertEqual(request_dict['callback'], 'parse_item_reference')
+        self.assertEqual(request_dict['errback'], 'handle_error_reference')
+
+    def test_private_reference_callback_serialization(self):
+        r = Request("http://www.example.com",
+                    callback=self.spider._TestSpider__parse_item_reference,
+                    errback=self.spider._TestSpider__handle_error_reference)
+        self._assert_serializes_ok(r, spider=self.spider)
+        request_dict = request_to_dict(r, self.spider)
+        self.assertEqual(request_dict['callback'],
+                         '_TestSpider__parse_item_reference')
+        self.assertEqual(request_dict['errback'],
+                         '_TestSpider__handle_error_reference')
+
     def test_private_callback_serialization(self):
         r = Request("http://www.example.com",
                     callback=self.spider._TestSpider__parse_item_private,
@@ -131,8 +151,28 @@ class TestSpiderMixin:
         pass
 
 
+def parse_item(response):
+    pass
+
+
+def handle_error(failure):
+    pass
+
+
+def private_parse_item(response):
+    pass
+
+
+def private_handle_error(failure):
+    pass
+
+
 class TestSpider(Spider, TestSpiderMixin):
     name = 'test'
+    parse_item_reference = parse_item
+    handle_error_reference = handle_error
+    __parse_item_reference = private_parse_item
+    __handle_error_reference = private_handle_error
 
     def parse_item(self, response):
         pass

From 901892dab380d54186ae855bf65a20eb2467de04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Thu, 16 Apr 2020 14:48:38 +0200
Subject: [PATCH 061/181] Fix the hoverxref configuration

---
 docs/conf.py | 2 --
 tox.ini      | 6 ++++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 4414ef637..813417bae 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -295,8 +295,6 @@ intersphinx_mapping = {
 # ------------------------------------
 
 hoverxref_auto_ref = True
-hoverxref_project = "scrapy"
-hoverxref_version = release
 hoverxref_role_types = {
     "class": "tooltip",
     "confval": "tooltip",
diff --git a/tox.ini b/tox.ini
index b1babc7fd..cd118c921 100644
--- a/tox.ini
+++ b/tox.ini
@@ -74,11 +74,15 @@ deps =
 changedir = docs
 deps =
     -rdocs/requirements.txt
+setenv =
+    READTHEDOCS_PROJECT=scrapy
+    READTHEDOCS_VERSION=master
 
 [testenv:docs]
 basepython = python3
 changedir = {[docs]changedir}
 deps = {[docs]deps}
+setenv = {[docs]setenv}
 commands =
     sphinx-build -W -b html . {envtmpdir}/html
 
@@ -86,6 +90,7 @@ commands =
 basepython = python3
 changedir = {[docs]changedir}
 deps = {[docs]deps}
+setenv = {[docs]setenv}
 commands =
     sphinx-build -b coverage . {envtmpdir}/coverage
 
@@ -93,6 +98,7 @@ commands =
 basepython = python3
 changedir = {[docs]changedir}
 deps = {[docs]deps}
+setenv = {[docs]setenv}
 commands =
     sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck
 

From e0921cab667a1fefe9730363ed45091edf1250e8 Mon Sep 17 00:00:00 2001
From: Victor Torres <vpaivatorres@gmail.com>
Date: Thu, 16 Apr 2020 11:18:56 -0300
Subject: [PATCH 062/181] remove not used code

This code is not needed anymore because we're getting the
already mangled name when matching func with spider attributes.
---
 scrapy/utils/reqser.py     | 16 ----------------
 tests/test_utils_reqser.py | 37 +------------------------------------
 2 files changed, 1 insertion(+), 52 deletions(-)

diff --git a/scrapy/utils/reqser.py b/scrapy/utils/reqser.py
index 78e13ec10..1392b2c61 100644
--- a/scrapy/utils/reqser.py
+++ b/scrapy/utils/reqser.py
@@ -70,20 +70,6 @@ def request_from_dict(d, spider=None):
     )
 
 
-def _is_private_method(name):
-    return name.startswith('__') and not name.endswith('__')
-
-
-def _mangle_private_name(obj, func, name):
-    qualname = getattr(func, '__qualname__', None)
-    if qualname is None:
-        classname = obj.__class__.__name__.lstrip('_')
-        return '_%s%s' % (classname, name)
-    else:
-        splits = qualname.split('.')
-        return '_%s%s' % (splits[-2], splits[-1])
-
-
 def _find_method(obj, func):
     if obj:
         try:
@@ -95,8 +81,6 @@ def _find_method(obj, func):
                 members = inspect.getmembers(obj, predicate=inspect.ismethod)
                 for name, obj_func in members:
                     if obj_func.__func__ is func.__func__:
-                        if _is_private_method(name):
-                            return _mangle_private_name(obj, func, name)
                         return name
     raise ValueError("Function %s is not a method of: %s" % (func, obj))
 
diff --git a/tests/test_utils_reqser.py b/tests/test_utils_reqser.py
index cf84f8fbd..47853d812 100644
--- a/tests/test_utils_reqser.py
+++ b/tests/test_utils_reqser.py
@@ -2,7 +2,7 @@ import unittest
 
 from scrapy.http import Request, FormRequest
 from scrapy.spiders import Spider
-from scrapy.utils.reqser import request_to_dict, request_from_dict, _is_private_method, _mangle_private_name
+from scrapy.utils.reqser import request_to_dict, request_from_dict
 
 
 class RequestSerializationTest(unittest.TestCase):
@@ -101,41 +101,6 @@ class RequestSerializationTest(unittest.TestCase):
                     errback=self.spider.handle_error)
         self._assert_serializes_ok(r, spider=self.spider)
 
-    def test_private_callback_name_matching(self):
-        self.assertTrue(_is_private_method('__a'))
-        self.assertTrue(_is_private_method('__a_'))
-        self.assertTrue(_is_private_method('__a_a'))
-        self.assertTrue(_is_private_method('__a_a_'))
-        self.assertTrue(_is_private_method('__a__a'))
-        self.assertTrue(_is_private_method('__a__a_'))
-        self.assertTrue(_is_private_method('__a___a'))
-        self.assertTrue(_is_private_method('__a___a_'))
-        self.assertTrue(_is_private_method('___a'))
-        self.assertTrue(_is_private_method('___a_'))
-        self.assertTrue(_is_private_method('___a_a'))
-        self.assertTrue(_is_private_method('___a_a_'))
-        self.assertTrue(_is_private_method('____a_a_'))
-
-        self.assertFalse(_is_private_method('_a'))
-        self.assertFalse(_is_private_method('_a_'))
-        self.assertFalse(_is_private_method('__a__'))
-        self.assertFalse(_is_private_method('__'))
-        self.assertFalse(_is_private_method('___'))
-        self.assertFalse(_is_private_method('____'))
-
-    def _assert_mangles_to(self, obj, name):
-        func = getattr(obj, name)
-        self.assertEqual(
-            _mangle_private_name(obj, func, func.__name__),
-            name
-        )
-
-    def test_private_name_mangling(self):
-        self._assert_mangles_to(
-            self.spider, '_TestSpider__parse_item_private')
-        self._assert_mangles_to(
-            self.spider, '_TestSpiderMixin__mixin_callback')
-
     def test_unserializable_callback1(self):
         r = Request("http://www.example.com", callback=lambda x: x)
         self.assertRaises(ValueError, request_to_dict, r)

From 94c95020b391c3298f4a7fd7608d48d74117bf43 Mon Sep 17 00:00:00 2001
From: Victor Torres <vpaivatorres@gmail.com>
Date: Thu, 16 Apr 2020 11:37:03 -0300
Subject: [PATCH 063/181] add comment to explain the use of __func__ instead of
 instance method objects

---
 scrapy/utils/reqser.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/scrapy/utils/reqser.py b/scrapy/utils/reqser.py
index 1392b2c61..5ea2aafb8 100644
--- a/scrapy/utils/reqser.py
+++ b/scrapy/utils/reqser.py
@@ -80,6 +80,13 @@ def _find_method(obj, func):
             if func_self is obj:
                 members = inspect.getmembers(obj, predicate=inspect.ismethod)
                 for name, obj_func in members:
+                    # We need to use __func__ to access the original
+                    # function object because instance method objects
+                    # are generated each time attribute is retrieved from
+                    # instance.
+                    #
+                    # Reference: The standard type hierarchy
+                    # https://docs.python.org/3/reference/datamodel.html
                     if obj_func.__func__ is func.__func__:
                         return name
     raise ValueError("Function %s is not a method of: %s" % (func, obj))

From c9229922772a4d7f92a26786d6ea441609043a09 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Thu, 16 Apr 2020 11:37:37 -0300
Subject: [PATCH 064/181] Tests: Move code inside __main__ block

---
 tests/CrawlerRunner/ip_address.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tests/CrawlerRunner/ip_address.py b/tests/CrawlerRunner/ip_address.py
index 5a71536d8..826374cd4 100644
--- a/tests/CrawlerRunner/ip_address.py
+++ b/tests/CrawlerRunner/ip_address.py
@@ -23,15 +23,16 @@ class LocalhostSpider(Spider):
         self.logger.info("IP address: %s" % response.ip_address)
 
 
-with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server:
-    port = urlparse(mock_http_server.http_address).port
-    url = "http://not.a.real.domain:{port}/echo".format(port=port)
+if __name__ == "__main__":
+    with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server:
+        port = urlparse(mock_http_server.http_address).port
+        url = "http://not.a.real.domain:{port}/echo".format(port=port)
 
-    servers = [(mock_dns_server.host, mock_dns_server.port)]
-    reactor.installResolver(createResolver(servers=servers))
+        servers = [(mock_dns_server.host, mock_dns_server.port)]
+        reactor.installResolver(createResolver(servers=servers))
 
-    configure_logging()
-    runner = CrawlerRunner()
-    d = runner.crawl(LocalhostSpider, url=url)
-    d.addBoth(lambda _: reactor.stop())
-    reactor.run()
+        configure_logging()
+        runner = CrawlerRunner()
+        d = runner.crawl(LocalhostSpider, url=url)
+        d.addBoth(lambda _: reactor.stop())
+        reactor.run()

From 1ade3fc723d1e5d7b6a3300b454d32656bdb8d28 Mon Sep 17 00:00:00 2001
From: Victor Torres <vpaivatorres@gmail.com>
Date: Fri, 17 Apr 2020 10:34:34 -0300
Subject: [PATCH 065/181] trying to improve test coverage

---
 tests/test_utils_reqser.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_utils_reqser.py b/tests/test_utils_reqser.py
index 47853d812..50b026d1c 100644
--- a/tests/test_utils_reqser.py
+++ b/tests/test_utils_reqser.py
@@ -110,6 +110,21 @@ class RequestSerializationTest(unittest.TestCase):
         r = Request("http://www.example.com", callback=self.spider.parse_item)
         self.assertRaises(ValueError, request_to_dict, r)
 
+    def test_unserializable_callback3(self):
+        """Parser method is removed or replaced dynamically."""
+
+        class MySpider(Spider):
+
+            name = 'my_spider'
+
+            def parse(self, response):
+                pass
+
+        spider = MySpider()
+        r = Request("http://www.example.com", callback=spider.parse)
+        setattr(spider, 'parse', None)
+        self.assertRaises(ValueError, request_to_dict, r, spider=spider)
+
 
 class TestSpiderMixin:
     def __mixin_callback(self, response):

From 04b6295a69174e81beceb0b1429fa3775949e99d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 17 Apr 2020 20:50:17 -0300
Subject: [PATCH 066/181] Docs: replace deprecated FEED_* settings

---
 docs/topics/practices.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/topics/practices.rst b/docs/topics/practices.rst
index e3e8fdc72..cf1de1bd1 100644
--- a/docs/topics/practices.rst
+++ b/docs/topics/practices.rst
@@ -35,8 +35,9 @@ Here's an example showing how to run a single spider with it.
         ...
 
     process = CrawlerProcess(settings={
-        'FEED_FORMAT': 'json',
-        'FEED_URI': 'items.json'
+        "FEEDS": {
+            "items.json": {"format": "json"},
+        },
     })
 
     process.crawl(MySpider)

From bfeb2c8c13de0c45af21228f69395a1131913da5 Mon Sep 17 00:00:00 2001
From: sakshamb2113 <44064539+sakshamb2113@users.noreply.github.com>
Date: Sat, 18 Apr 2020 20:51:26 +0530
Subject: [PATCH 067/181] Added warning to use double quotes in Windows for
 scrapy shell in shell.rst (#4450)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* modified debugging memory leaks with guppy in leaks.rst

* modified leaks.rst(issue #4285)

* removed guppy from telnet.py

* Fix undefined name error

* removed hpy key from telnet_vars in telnet.py

* updated shell.rst

* Update docs/topics/shell.rst

Co-Authored-By: Adrián Chaves <adrian@chaves.io>

Co-authored-by: Adrián Chaves <adrian@chaves.io>
---
 docs/topics/shell.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/topics/shell.rst b/docs/topics/shell.rst
index 8f7518b19..0f46f1c87 100644
--- a/docs/topics/shell.rst
+++ b/docs/topics/shell.rst
@@ -156,6 +156,17 @@ First, we launch the shell::
 
     scrapy shell 'https://scrapy.org' --nolog
 
+.. note::
+
+   Remember to always enclose URLs in quotes when running the Scrapy shell from
+   the command line, otherwise URLs containing arguments (i.e. the ``&`` character)
+   will not work.
+
+   On Windows, use double quotes instead::
+
+       scrapy shell "https://scrapy.org" --nolog
+
+
 Then, the shell fetches the URL (using the Scrapy downloader) and prints the
 list of available objects and useful shortcuts (you'll notice that these lines
 all start with the ``[s]`` prefix)::

From 773ddf77ba36613a90d35e583b29e9020cc2aace Mon Sep 17 00:00:00 2001
From: ilias-ant <ilias.antonopoulos@yahoo.gr>
Date: Sun, 19 Apr 2020 14:14:17 +0300
Subject: [PATCH 068/181] added more tests to cover the (downloaded, uptodate,
 cached) status functionality

---
 tests/test_pipeline_files.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py
index f155db4ce..9eb6462f2 100644
--- a/tests/test_pipeline_files.py
+++ b/tests/test_pipeline_files.py
@@ -84,6 +84,7 @@ class FilesPipelineTestCase(unittest.TestCase):
 
         result = yield self.pipeline.process_item(item, None)
         self.assertEqual(result['files'][0]['checksum'], 'abc')
+        self.assertEqual(result['files'][0]['status'], 'uptodate')
 
         for p in patchers:
             p.stop()
@@ -105,6 +106,29 @@ class FilesPipelineTestCase(unittest.TestCase):
 
         result = yield self.pipeline.process_item(item, None)
         self.assertNotEqual(result['files'][0]['checksum'], 'abc')
+        self.assertEqual(result['files'][0]['status'], 'downloaded')
+
+        for p in patchers:
+            p.stop()
+
+    @defer.inlineCallbacks
+    def test_file_cached(self):
+        item_url = "http://example.com/file3.pdf"
+        item = _create_item_with_files(item_url)
+        patchers = [
+            mock.patch.object(FilesPipeline, 'inc_stats', return_value=True),
+            mock.patch.object(FSFilesStore, 'stat_file', return_value={
+                'checksum': 'abc',
+                'last_modified': time.time() - (self.pipeline.expires * 60 * 60 * 24 * 2)}),
+            mock.patch.object(FilesPipeline, 'get_media_requests',
+                              return_value=[_prepare_request_object(item_url, flags=['cached'])])
+        ]
+        for p in patchers:
+            p.start()
+
+        result = yield self.pipeline.process_item(item, None)
+        self.assertNotEqual(result['files'][0]['checksum'], 'abc')
+        self.assertEqual(result['files'][0]['status'], 'cached')
 
         for p in patchers:
             p.stop()
@@ -403,10 +427,10 @@ def _create_item_with_files(*files):
     return item
 
 
-def _prepare_request_object(item_url):
+def _prepare_request_object(item_url, flags=None):
     return Request(
         item_url,
-        meta={'response': Response(item_url, status=200, body=b'data')})
+        meta={'response': Response(item_url, status=200, body=b'data', flags=flags)})
 
 
 if __name__ == "__main__":

From e4750f2fbdacbeb7a20ae7c6b13bba3fb0f7ad54 Mon Sep 17 00:00:00 2001
From: Aditya Kumar <k.aditya00@gmail.com>
Date: Mon, 20 Apr 2020 21:17:57 +0530
Subject: [PATCH 069/181] async/deferred signal handlers (#4390)

* [docs] async/deferred signal handlers

* [docs] update deferred signals example

* [docs] add subsections for built-in signals

* docs(signals): update signal handler example

* docs(signals): update signal handler example
---
 docs/topics/signals.rst | 96 +++++++++++++++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 19 deletions(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 2def53848..8661f86a0 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -16,8 +16,7 @@ deliver the arguments that the handler receives.
 You can connect to signals (or send your own) through the
 :ref:`topics-api-signals`.
 
-Here is a simple example showing how you can catch signals and perform some action:
-::
+Here is a simple example showing how you can catch signals and perform some action::
 
     from scrapy import signals
     from scrapy import Spider
@@ -52,9 +51,45 @@ Deferred signal handlers
 ========================
 
 Some signals support returning :class:`~twisted.internet.defer.Deferred`
-objects from their handlers, see the :ref:`topics-signals-ref` below to know
-which ones.
+objects from their handlers, allowing you to run asynchronous code that
+does not block Scrapy. If a signal handler returns a
+:class:`~twisted.internet.defer.Deferred`, Scrapy waits for that
+:class:`~twisted.internet.defer.Deferred` to fire.
 
+Let's take an example::
+
+    class SignalSpider(scrapy.Spider):
+        name = 'signals'
+        start_urls = ['http://quotes.toscrape.com/page/1/']
+
+        @classmethod
+        def from_crawler(cls, crawler, *args, **kwargs):
+            spider = super(SignalSpider, cls).from_crawler(crawler, *args, **kwargs)
+            crawler.signals.connect(spider.item_scraped, signal=signals.item_scraped)
+            return spider
+
+        def item_scraped(self, item):
+            # Send the scraped item to the server
+            d = treq.post(
+                'http://example.com/post',
+                json.dumps(item).encode('ascii'),
+                headers={b'Content-Type': [b'application/json']}
+            )
+
+            # The next item will be scraped only after
+            # deferred (d) is fired
+            return d
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('small.author::text').get(),
+                    'tags': quote.css('div.tags a.tag::text').getall(),
+                }
+
+See the :ref:`topics-signals-ref` below to know which signals support
+:class:`~twisted.internet.defer.Deferred`.
 
 .. _topics-signals-ref:
 
@@ -66,9 +101,12 @@ Built-in signals reference
 
 Here's the list of Scrapy built-in signals and their meaning.
 
-engine_started
+Engine signals
 --------------
 
+engine_started
+~~~~~~~~~~~~~~
+
 .. signal:: engine_started
 .. function:: engine_started()
 
@@ -81,7 +119,7 @@ engine_started
     getting fired before :signal:`spider_opened`.
 
 engine_stopped
---------------
+~~~~~~~~~~~~~~
 
 .. signal:: engine_stopped
 .. function:: engine_stopped()
@@ -91,9 +129,20 @@ engine_stopped
 
     This signal supports returning deferreds from their handlers.
 
-item_scraped
+Item signals
 ------------
 
+.. note::
+    As at max :setting:`CONCURRENT_ITEMS` items are processed in
+    parallel, many deferreds are fired together using
+    :class:`~twisted.internet.defer.DeferredList`. Hence the next
+    batch waits for the :class:`~twisted.internet.defer.DeferredList`
+    to fire and then runs the respective item signal handler for
+    the next batch of scraped items.
+
+item_scraped
+~~~~~~~~~~~~
+
 .. signal:: item_scraped
 .. function:: item_scraped(item, response, spider)
 
@@ -112,7 +161,7 @@ item_scraped
     :type response: :class:`~scrapy.http.Response` object
 
 item_dropped
-------------
+~~~~~~~~~~~~
 
 .. signal:: item_dropped
 .. function:: item_dropped(item, response, exception, spider)
@@ -137,7 +186,7 @@ item_dropped
     :type exception: :exc:`~scrapy.exceptions.DropItem` exception
 
 item_error
-------------
+~~~~~~~~~~
 
 .. signal:: item_error
 .. function:: item_error(item, response, spider, failure)
@@ -159,8 +208,11 @@ item_error
     :param failure: the exception raised
     :type failure: twisted.python.failure.Failure
 
+Spider signals
+--------------
+
 spider_closed
--------------
+~~~~~~~~~~~~~
 
 .. signal:: spider_closed
 .. function:: spider_closed(spider, reason)
@@ -183,7 +235,7 @@ spider_closed
     :type reason: str
 
 spider_opened
--------------
+~~~~~~~~~~~~~
 
 .. signal:: spider_opened
 .. function:: spider_opened(spider)
@@ -198,7 +250,7 @@ spider_opened
     :type spider: :class:`~scrapy.spiders.Spider` object
 
 spider_idle
------------
+~~~~~~~~~~~
 
 .. signal:: spider_idle
 .. function:: spider_idle(spider)
@@ -228,7 +280,7 @@ spider_idle
     due to duplication).
 
 spider_error
-------------
+~~~~~~~~~~~~
 
 .. signal:: spider_error
 .. function:: spider_error(failure, response, spider)
@@ -246,8 +298,11 @@ spider_error
     :param spider: the spider which raised the exception
     :type spider: :class:`~scrapy.spiders.Spider` object
 
+Request signals
+---------------
+
 request_scheduled
------------------
+~~~~~~~~~~~~~~~~~
 
 .. signal:: request_scheduled
 .. function:: request_scheduled(request, spider)
@@ -264,7 +319,7 @@ request_scheduled
     :type spider: :class:`~scrapy.spiders.Spider` object
 
 request_dropped
----------------
+~~~~~~~~~~~~~~~
 
 .. signal:: request_dropped
 .. function:: request_dropped(request, spider)
@@ -281,7 +336,7 @@ request_dropped
     :type spider: :class:`~scrapy.spiders.Spider` object
 
 request_reached_downloader
----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. signal:: request_reached_downloader
 .. function:: request_reached_downloader(request, spider)
@@ -297,7 +352,7 @@ request_reached_downloader
     :type spider: :class:`~scrapy.spiders.Spider` object
 
 request_left_downloader
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
 
 .. signal:: request_left_downloader
 .. function:: request_left_downloader(request, spider)
@@ -315,8 +370,11 @@ request_left_downloader
     :param spider: the spider that yielded the request
     :type spider: :class:`~scrapy.spiders.Spider` object
 
+Response signals
+----------------
+
 response_received
------------------
+~~~~~~~~~~~~~~~~~
 
 .. signal:: response_received
 .. function:: response_received(response, request, spider)
@@ -336,7 +394,7 @@ response_received
     :type spider: :class:`~scrapy.spiders.Spider` object
 
 response_downloaded
--------------------
+~~~~~~~~~~~~~~~~~~~
 
 .. signal:: response_downloaded
 .. function:: response_downloaded(response, request, spider)

From efb6f13debf9406a214a9cee3d94d47875d542f5 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Thu, 23 Apr 2020 07:40:10 -0300
Subject: [PATCH 070/181] Remove assertions from production code (#4440)

---
 scrapy/commands/__init__.py          |  3 ++-
 scrapy/contracts/default.py          |  6 +++++-
 scrapy/core/downloader/middleware.py |  5 +++--
 scrapy/core/engine.py                | 26 ++++++++++++++++++--------
 scrapy/core/scraper.py               |  6 +++++-
 scrapy/crawler.py                    |  3 ++-
 scrapy/http/request/__init__.py      |  3 ++-
 scrapy/pipelines/files.py            |  6 ++++--
 scrapy/utils/iterators.py            | 10 ++++++----
 scrapy/utils/reactor.py              |  3 ++-
 tests/test_utils_iterators.py        |  2 +-
 11 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index a573a03d9..9f8e6986a 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -23,7 +23,8 @@ class ScrapyCommand:
         self.settings = None  # set in scrapy.cmdline
 
     def set_crawler(self, crawler):
-        assert not hasattr(self, '_crawler'), "crawler already set"
+        if hasattr(self, '_crawler'):
+            raise RuntimeError("crawler already set")
         self._crawler = crawler
 
     def syntax(self):
diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py
index 3002fc702..a1b0f8f22 100644
--- a/scrapy/contracts/default.py
+++ b/scrapy/contracts/default.py
@@ -58,7 +58,11 @@ class ReturnsContract(Contract):
     def __init__(self, *args, **kwargs):
         super(ReturnsContract, self).__init__(*args, **kwargs)
 
-        assert len(self.args) in [1, 2, 3]
+        if len(self.args) not in [1, 2, 3]:
+            raise ValueError(
+                "Incorrect argument quantity: expected 1, 2 or 3, got %i"
+                % len(self.args)
+            )
         self.obj_name = self.args[0] or None
         self.obj_type = self.objects[self.obj_name]
 
diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
index 5a03dcdf7..4c2eea522 100644
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@@ -45,8 +45,9 @@ class DownloaderMiddlewareManager(MiddlewareManager):
 
         @defer.inlineCallbacks
         def process_response(response):
-            assert response is not None, 'Received None in process_response'
-            if isinstance(response, Request):
+            if response is None:
+                raise TypeError("Received None in process_response")
+            elif isinstance(response, Request):
                 return response
 
             for method in self.methods['process_response']:
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index 66cf9ad9a..77d71846e 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -73,7 +73,8 @@ class ExecutionEngine:
     @defer.inlineCallbacks
     def start(self):
         """Start the execution engine"""
-        assert not self.running, "Engine already running"
+        if self.running:
+            raise RuntimeError("Engine already running")
         self.start_time = time()
         yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
         self.running = True
@@ -82,7 +83,8 @@ class ExecutionEngine:
 
     def stop(self):
         """Stop the execution engine gracefully"""
-        assert self.running, "Engine not running"
+        if not self.running:
+            raise RuntimeError("Engine not running")
         self.running = False
         dfd = self._close_all_spiders()
         return dfd.addBoth(lambda _: self._finish_stopping_engine())
@@ -165,7 +167,11 @@ class ExecutionEngine:
         return d
 
     def _handle_downloader_output(self, response, request, spider):
-        assert isinstance(response, (Request, Response, Failure)), response
+        if not isinstance(response, (Request, Response, Failure)):
+            raise TypeError(
+                "Incorrect type: expected Request, Response or Failure, got %s: %r"
+                % (type(response), response)
+            )
         # downloader middleware can return requests (for example, redirects)
         if isinstance(response, Request):
             self.crawl(response, spider)
@@ -205,8 +211,8 @@ class ExecutionEngine:
         return not bool(self.slot)
 
     def crawl(self, request, spider):
-        assert spider in self.open_spiders, \
-            "Spider %r not opened when crawling: %s" % (spider.name, request)
+        if spider not in self.open_spiders:
+            raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request))
         self.schedule(request, spider)
         self.slot.nextcall.schedule()
 
@@ -232,7 +238,11 @@ class ExecutionEngine:
         slot.add_request(request)
 
         def _on_success(response):
-            assert isinstance(response, (Response, Request))
+            if not isinstance(response, (Response, Request)):
+                raise TypeError(
+                    "Incorrect type: expected Response or Request, got %s: %r"
+                    % (type(response), response)
+                )
             if isinstance(response, Response):
                 response.request = request  # tie request to response received
                 logkws = self.logformatter.crawled(request, response, spider)
@@ -253,8 +263,8 @@ class ExecutionEngine:
 
     @defer.inlineCallbacks
     def open_spider(self, spider, start_requests=(), close_if_idle=True):
-        assert self.has_capacity(), "No free spider slot when opening %r" % \
-            spider.name
+        if not self.has_capacity():
+            raise RuntimeError("No free spider slot when opening %r" % spider.name)
         logger.info("Spider opened", extra={'spider': spider})
         nextcall = CallLaterOnce(self._next_request, spider)
         scheduler = self.scheduler_cls.from_crawler(self.crawler)
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index 3e4826216..edbb4dd66 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -123,7 +123,11 @@ class Scraper:
     def _scrape(self, response, request, spider):
         """Handle the downloaded response or failure through the spider
         callback/errback"""
-        assert isinstance(response, (Response, Failure))
+        if not isinstance(response, (Response, Failure)):
+            raise TypeError(
+                "Incorrect type: expected Response or Failure, got %s: %r"
+                % (type(response), response)
+            )
 
         dfd = self._scrape2(response, request, spider)  # returns spider's processed output
         dfd.addErrback(self.handle_spider_error, request, response, spider)
diff --git a/scrapy/crawler.py b/scrapy/crawler.py
index 20990ea41..6f43771e2 100644
--- a/scrapy/crawler.py
+++ b/scrapy/crawler.py
@@ -78,7 +78,8 @@ class Crawler:
 
     @defer.inlineCallbacks
     def crawl(self, *args, **kwargs):
-        assert not self.crawling, "Crawling already taking place"
+        if self.crawling:
+            raise RuntimeError("Crawling already taking place")
         self.crawling = True
 
         try:
diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py
index 0a6637af8..a98ba9960 100644
--- a/scrapy/http/request/__init__.py
+++ b/scrapy/http/request/__init__.py
@@ -24,7 +24,8 @@ class Request(object_ref):
         self.method = str(method).upper()
         self._set_url(url)
         self._set_body(body)
-        assert isinstance(priority, int), "Request priority not an integer: %r" % priority
+        if not isinstance(priority, int):
+            raise TypeError("Request priority not an integer: %r" % priority)
         self.priority = priority
 
         if callback is not None and not callable(callback):
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index aab645d3d..ae365db5b 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -106,7 +106,8 @@ class S3FilesStore:
         else:
             from boto.s3.connection import S3Connection
             self.S3Connection = S3Connection
-        assert uri.startswith('s3://')
+        if not uri.startswith("s3://"):
+            raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri)
         self.bucket, self.prefix = uri[5:].split('/', 1)
 
     def stat_file(self, path, info):
@@ -266,7 +267,8 @@ class FTPFilesStore:
     USE_ACTIVE_MODE = None
 
     def __init__(self, uri):
-        assert uri.startswith('ftp://')
+        if not uri.startswith("ftp://"):
+            raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri)
         u = urlparse(uri)
         self.port = u.port
         self.host = u.hostname
diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py
index b71419111..5e15bf0c8 100644
--- a/scrapy/utils/iterators.py
+++ b/scrapy/utils/iterators.py
@@ -128,10 +128,12 @@ def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
 
 def _body_or_str(obj, unicode=True):
     expected_types = (Response, str, bytes)
-    assert isinstance(obj, expected_types), \
-        "obj must be %s, not %s" % (
-            " or ".join(t.__name__ for t in expected_types),
-            type(obj).__name__)
+    if not isinstance(obj, expected_types):
+        expected_types_str = " or ".join(t.__name__ for t in expected_types)
+        raise TypeError(
+            "Object %r must be %s, not %s"
+            % (obj, expected_types_str, type(obj).__name__)
+        )
     if isinstance(obj, Response):
         if not unicode:
             return obj.body
diff --git a/scrapy/utils/reactor.py b/scrapy/utils/reactor.py
index 5308812d6..3c705f69b 100644
--- a/scrapy/utils/reactor.py
+++ b/scrapy/utils/reactor.py
@@ -9,7 +9,8 @@ from scrapy.utils.misc import load_object
 def listen_tcp(portrange, host, factory):
     """Like reactor.listenTCP but tries different ports in a range."""
     from twisted.internet import reactor
-    assert len(portrange) <= 2, "invalid portrange: %s" % portrange
+    if len(portrange) > 2:
+        raise ValueError("invalid portrange: %s" % portrange)
     if not portrange:
         return reactor.listenTCP(0, factory, interface=host)
     if not hasattr(portrange, '__iter__'):
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index ec8311298..a85087619 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -157,7 +157,7 @@ class XmliterTestCase(unittest.TestCase):
 
     def test_xmliter_objtype_exception(self):
         i = self.xmliter(42, 'product')
-        self.assertRaises(AssertionError, next, i)
+        self.assertRaises(TypeError, next, i)
 
     def test_xmliter_encoding(self):
         body = b'<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n    <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'

From ffe576c4ed192882d1e40fef815f0c1d5354249a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Fri, 24 Apr 2020 11:44:36 +0200
Subject: [PATCH 071/181] Cover Scrapy 2.1 in the release notes (#4499)

Co-authored-by: Mikhail Korobov <kmike84@gmail.com>
---
 docs/news.rst                    | 147 +++++++++++++++++++++++++++++++
 docs/topics/request-response.rst |   5 ++
 2 files changed, 152 insertions(+)

diff --git a/docs/news.rst b/docs/news.rst
index e9b7140cd..a158246eb 100644
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -3,6 +3,153 @@
 Release notes
 =============
 
+.. _release-2.1.0:
+
+Scrapy 2.1.0 (2020-04-24)
+-------------------------
+
+Highlights:
+
+* New :setting:`FEEDS` setting to export to multiple feeds
+* New :attr:`Response.ip_address <scrapy.http.Response.ip_address>` attribute
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   :exc:`AssertionError` exceptions triggered by :ref:`assert <assert>`
+    statements have been replaced by new exception types, to support running
+    Python in optimized mode (see :option:`-O`) without changing Scrapy’s
+    behavior in any unexpected ways.
+
+    If you catch an :exc:`AssertionError` exception from Scrapy, update your
+    code to catch the corresponding new exception.
+
+    (:issue:`4440`)
+
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   The ``LOG_UNSERIALIZABLE_REQUESTS`` setting is no longer supported, use
+    :setting:`SCHEDULER_DEBUG` instead (:issue:`4385`)
+
+*   The ``REDIRECT_MAX_METAREFRESH_DELAY`` setting is no longer supported, use
+    :setting:`METAREFRESH_MAXDELAY` instead (:issue:`4385`)
+
+*   The :class:`~scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware`
+    middleware has been removed, including the entire
+    :class:`scrapy.downloadermiddlewares.chunked` module; chunked transfers
+    work out of the box (:issue:`4431`)
+
+*   The ``spiders`` property has been removed from
+    :class:`~scrapy.crawler.Crawler`, use :class:`CrawlerRunner.spider_loader
+    <scrapy.crawler.CrawlerRunner.spider_loader>` or instantiate
+    :setting:`SPIDER_LOADER_CLASS` with your settings instead (:issue:`4398`)
+
+*   The ``MultiValueDict``, ``MultiValueDictKeyError``, and ``SiteNode``
+    classes have been removed from :mod:`scrapy.utils.datatypes`
+    (:issue:`4400`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   The ``FEED_FORMAT`` and ``FEED_URI`` settings have been deprecated in
+    favor of the new :setting:`FEEDS` setting (:issue:`1336`, :issue:`3858`,
+    :issue:`4507`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   A new setting, :setting:`FEEDS`, allows configuring multiple output feeds
+    with different settings each (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The :command:`crawl` and :command:`runspider` commands now support multiple
+    ``-o`` parameters (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The :command:`crawl` and :command:`runspider` commands now support
+    specifying an output format by appending ``:<format>`` to the output file
+    (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The new :attr:`Response.ip_address <scrapy.http.Response.ip_address>`
+    attribute gives access to the IP address that originated a response
+    (:issue:`3903`, :issue:`3940`)
+
+*   A warning is now issued when a value in
+    :attr:`~scrapy.spiders.Spider.allowed_domains` includes a port
+    (:issue:`50`, :issue:`3198`, :issue:`4413`)
+
+*   Zsh completion now excludes used option aliases from the completion list
+    (:issue:`4438`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :ref:`Request serialization <request-serialization>` no longer breaks for
+    callbacks that are spider attributes which are assigned a function with a
+    different name (:issue:`4500`)
+
+*   ``None`` values in :attr:`~scrapy.spiders.Spider.allowed_domains` no longer
+    cause a :exc:`TypeError` exception (:issue:`4410`)
+
+*   Zsh completion no longer allows options after arguments (:issue:`4438`)
+
+*   zope.interface 5.0.0 and later versions are now supported
+    (:issue:`4447`, :issue:`4448`)
+
+*   :meth:`Spider.make_requests_from_url
+    <scrapy.spiders.Spider.make_requests_from_url>`, deprecated in Scrapy
+    1.4.0, now issues a warning when used (:issue:`4412`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Improved the documentation about signals that allow their handlers to
+    return a :class:`~twisted.internet.defer.Deferred` (:issue:`4295`,
+    :issue:`4390`)
+
+*   Our PyPI entry now includes links for our documentation, our source code
+    repository and our issue tracker (:issue:`4456`)
+
+*   Covered the `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_
+    service in the documentation (:issue:`4206`, :issue:`4455`)
+
+*   Removed references to the Guppy library, which only works in Python 2
+    (:issue:`4285`, :issue:`4343`)
+
+*   Extended use of InterSphinx to link to Python 3 documentation
+    (:issue:`4444`, :issue:`4445`)
+
+*   Added support for Sphinx 3.0 and later (:issue:`4475`, :issue:`4480`,
+    :issue:`4496`, :issue:`4503`)
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Removed warnings about using old, removed settings (:issue:`4404`)
+
+*   Removed a warning about importing
+    :class:`~twisted.internet.testing.StringTransport` from
+    ``twisted.test.proto_helpers`` in Twisted 19.7.0 or newer (:issue:`4409`)
+
+*   Removed outdated Debian package build files (:issue:`4384`)
+
+*   Removed :class:`object` usage as a base class (:issue:`4430`)
+
+*   Removed code that added support for old versions of Twisted that we no
+    longer support (:issue:`4472`)
+
+*   Fixed code style issues (:issue:`4468`, :issue:`4469`, :issue:`4471`,
+    :issue:`4481`)
+
+*   Removed :func:`twisted.internet.defer.returnValue` calls (:issue:`4443`,
+    :issue:`4446`, :issue:`4489`)
+
+
 .. _release-2.0.1:
 
 Scrapy 2.0.1 (2020-03-18)
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 5eb4915cd..024f46466 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -619,6 +619,9 @@ Response objects
     :param ip_address: The IP address of the server from which the Response originated.
     :type ip_address: :class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`
 
+    .. versionadded:: 2.1.0
+       The ``ip_address`` parameter.
+
     .. attribute:: Response.url
 
         A string containing the URL of the response.
@@ -710,6 +713,8 @@ Response objects
 
     .. attribute:: Response.ip_address
 
+        .. versionadded:: 2.1.0
+
         The IP address of the server from which the Response originated.
         
         This attribute is currently only populated by the HTTP 1.1 download

From 3878b67a3771102d4b6668ac749afbec7dc85a8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Fri, 24 Apr 2020 11:46:54 +0200
Subject: [PATCH 072/181] =?UTF-8?q?Bump=20version:=202.0.0=20=E2=86=92=202?=
 =?UTF-8?q?.1.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg | 2 +-
 scrapy/VERSION   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f347a0cd0..de22a2783 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.0.0
+current_version = 2.1.0
 commit = True
 tag = True
 tag_name = {new_version}
diff --git a/scrapy/VERSION b/scrapy/VERSION
index 227cea215..7ec1d6db4 100644
--- a/scrapy/VERSION
+++ b/scrapy/VERSION
@@ -1 +1 @@
-2.0.0
+2.1.0

From c207dbf939811176a7b094e0f2547aa7846b1cf8 Mon Sep 17 00:00:00 2001
From: Ashe <sgy@tpay.co.kr>
Date: Tue, 28 Apr 2020 02:45:19 +0900
Subject: [PATCH 073/181] Remove the asyncio warning from coroutines page
 (#4513)

---
 docs/topics/coroutines.rst | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/topics/coroutines.rst b/docs/topics/coroutines.rst
index 5f61d6796..7a9ecd4d5 100644
--- a/docs/topics/coroutines.rst
+++ b/docs/topics/coroutines.rst
@@ -7,10 +7,6 @@ Coroutines
 Scrapy has :ref:`partial support <coroutine-support>` for the
 :ref:`coroutine syntax <async>`.
 
-.. warning:: :mod:`asyncio` support in Scrapy is experimental. Future Scrapy
-             versions may introduce related API and behavior changes without a
-             deprecation period or warning.
-
 .. _coroutine-support:
 
 Supported callables

From e3c3ec2ba988f654be1676586714fd96dba32c23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 28 Apr 2020 13:48:50 +0200
Subject: [PATCH 074/181] Run quick tests first in Travis CI

---
 .travis.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 66e1a9617..dc91dfe4c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,6 +11,9 @@ matrix:
       python: 3.8
     - env: TOXENV=flake8
       python: 3.8
+    - env: TOXENV=docs
+      python: 3.7  # Keep in sync with .readthedocs.yml
+
     - env: TOXENV=pypy3
     - env: TOXENV=py35
       python: 3.5
@@ -28,8 +31,6 @@ matrix:
       python: 3.8
     - env: TOXENV=py38-asyncio
       python: 3.8
-    - env: TOXENV=docs
-      python: 3.7  # Keep in sync with .readthedocs.yml
 install:
   - |
       if [ "$TOXENV" = "pypy3" ]; then

From 15d96ab8b5fa6d349a45920f93d349b8ea1d0372 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 28 Apr 2020 17:09:05 +0200
Subject: [PATCH 075/181] Test the latest Ubuntu along the latest Python

---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 66e1a9617..02c8885e5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,10 +24,13 @@ matrix:
       python: 3.7
     - env: TOXENV=py38
       python: 3.8
+      dist: bionic
     - env: TOXENV=extra-deps
       python: 3.8
+      dist: bionic
     - env: TOXENV=py38-asyncio
       python: 3.8
+      dist: bionic
     - env: TOXENV=docs
       python: 3.7  # Keep in sync with .readthedocs.yml
 install:

From 5c0f11b4ef1d58de4245d9f4ac9a26f21faf082c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 28 Apr 2020 17:32:53 +0200
Subject: [PATCH 076/181] Simplify the asyncio Tox environment

---
 .travis.yml |  4 ++--
 tox.ini     | 12 +-----------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 66e1a9617..a924eb68c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@ matrix:
       python: 3.5
     - env: TOXENV=pinned
       python: 3.5
-    - env: TOXENV=py35-asyncio
+    - env: TOXENV=asyncio
       python: 3.5.2
     - env: TOXENV=py36
       python: 3.6
@@ -26,7 +26,7 @@ matrix:
       python: 3.8
     - env: TOXENV=extra-deps
       python: 3.8
-    - env: TOXENV=py38-asyncio
+    - env: TOXENV=asyncio
       python: 3.8
     - env: TOXENV=docs
       python: 3.7  # Keep in sync with .readthedocs.yml
diff --git a/tox.ini b/tox.ini
index cd118c921..697328ebd 100644
--- a/tox.ini
+++ b/tox.ini
@@ -102,16 +102,6 @@ setenv = {[docs]setenv}
 commands =
     sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck
 
-[asyncio]
+[testenv:asyncio]
 commands =
     {[testenv]commands} --reactor=asyncio
-
-[testenv:py35-asyncio]
-basepython = python3.5
-deps = {[testenv]deps}
-commands = {[asyncio]commands}
-
-[testenv:py38-asyncio]
-basepython = python3.8
-deps = {[testenv]deps}
-commands = {[asyncio]commands}

From 3a64f3eb2902ed8168b78c43f3516cf657873cef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 28 Apr 2020 17:44:19 +0200
Subject: [PATCH 077/181] Remove TOXENV from .travis.yml unless needed

---
 .travis.yml | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 66e1a9617..b029d8bda 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,17 +12,14 @@ matrix:
     - env: TOXENV=flake8
       python: 3.8
     - env: TOXENV=pypy3
-    - env: TOXENV=py35
-      python: 3.5
+    - python: 3.5
     - env: TOXENV=pinned
       python: 3.5
     - env: TOXENV=py35-asyncio
       python: 3.5.2
-    - env: TOXENV=py36
-      python: 3.6
-    - env: TOXENV=py37
-      python: 3.7
-    - env: TOXENV=py38
+    - python: 3.6
+    - python: 3.7
+    - env: PYPI_RELEASE_JOB=true
       python: 3.8
     - env: TOXENV=extra-deps
       python: 3.8
@@ -62,4 +59,4 @@ deploy:
   on:
     tags: true
     repo: scrapy/scrapy
-    condition: "$TOXENV == py37 && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$"
+    condition: "$PYPI_RELEASE_JOB == true && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$"

From f787b8483ceb37ad8c9764d5a28be07028d85f70 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 20 Apr 2020 12:05:15 -0300
Subject: [PATCH 078/181] IPv6 test: check for the absence of DNSLookupError

---
 tests/test_crawler.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index b4144ea1d..9151278a5 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -311,14 +311,7 @@ class CrawlerProcessSubprocess(ScriptRunnerMixin, unittest.TestCase):
     def test_ipv6_alternative_name_resolver(self):
         log = self.run_script('alternative_name_resolver.py')
         self.assertIn('Spider closed (finished)', log)
-        self.assertTrue(any([
-            "twisted.internet.error.ConnectionRefusedError" in log,
-            "twisted.internet.error.ConnectError" in log,
-        ]))
-        self.assertTrue(any([
-            "'downloader/exception_type_count/twisted.internet.error.ConnectionRefusedError': 1," in log,
-            "'downloader/exception_type_count/twisted.internet.error.ConnectError': 1," in log,
-        ]))
+        self.assertNotIn("twisted.internet.error.DNSLookupError", log)
 
     def test_reactor_select(self):
         log = self.run_script("twisted_reactor_select.py")

From 83d7360bb709cf2c73680260c58b767006f42b12 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Mon, 4 May 2020 02:00:11 +0500
Subject: [PATCH 079/181] Don't mention unsupported package versions in docs

---
 docs/topics/settings.rst | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index 18f81838f..e3da1bd12 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -420,10 +420,9 @@ connections (for ``HTTP10DownloadHandler``).
 .. note::
 
     HTTP/1.0 is rarely used nowadays so you can safely ignore this setting,
-    unless you use Twisted<11.1, or if you really want to use HTTP/1.0
-    and override :setting:`DOWNLOAD_HANDLERS_BASE` for ``http(s)`` scheme
-    accordingly, i.e. to
-    ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.
+    unless you really want to use HTTP/1.0 and override
+    :setting:`DOWNLOAD_HANDLERS_BASE` for ``http(s)`` scheme accordingly,
+    i.e. to ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.
 
 .. setting:: DOWNLOADER_CLIENTCONTEXTFACTORY
 
@@ -447,7 +446,6 @@ or even enable client-side authentication (and various other things).
     Scrapy also has another context factory class that you can set,
     ``'scrapy.core.downloader.contextfactory.BrowserLikeContextFactory'``,
     which uses the platform's certificates to validate remote endpoints.
-    **This is only available if you use Twisted>=14.0.**
 
 If you do use a custom ContextFactory, make sure its ``__init__`` method
 accepts a ``method`` parameter (this is the ``OpenSSL.SSL`` method mapping
@@ -494,10 +492,6 @@ This setting must be one of these string values:
 - ``'TLSv1.2'``: forces TLS version 1.2
 - ``'SSLv3'``: forces SSL version 3 (**not recommended**)
 
-.. note::
-
-    We recommend that you use PyOpenSSL>=0.13 and Twisted>=0.13
-    or above (Twisted>=14.0 if you can).
 
 .. setting:: DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING
 
@@ -660,8 +654,6 @@ If you want to disable it set to 0.
     spider attribute and per-request using :reqmeta:`download_maxsize`
     Request.meta key.
 
-    This feature needs Twisted >= 11.1.
-
 .. setting:: DOWNLOAD_WARNSIZE
 
 DOWNLOAD_WARNSIZE
@@ -679,8 +671,6 @@ If you want to disable it set to 0.
     spider attribute and per-request using :reqmeta:`download_warnsize`
     Request.meta key.
 
-    This feature needs Twisted >= 11.1.
-
 .. setting:: DOWNLOAD_FAIL_ON_DATALOSS
 
 DOWNLOAD_FAIL_ON_DATALOSS

From f75941f79dfeb931649f9e8880cdbd1b0a8e4681 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Thu, 30 Apr 2020 11:56:52 -0300
Subject: [PATCH 080/181] Deprecate scrapy.item.BaseItem

---
 scrapy/item.py     |  7 ++++++-
 tests/test_item.py | 25 ++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/scrapy/item.py b/scrapy/item.py
index 748368932..3558b2231 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -28,7 +28,12 @@ class BaseItem(object_ref):
     Unlike instances of :class:`dict`, instances of :class:`BaseItem` may be
     :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
     """
-    pass
+
+    def __new__(cls, *args, **kwargs):
+        if issubclass(cls, BaseItem) and not (issubclass(cls, Item) or issubclass(cls, DictItem)):
+            warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super(BaseItem, cls).__new__(cls, *args, **kwargs)
 
 
 class Field(dict):
diff --git a/tests/test_item.py b/tests/test_item.py
index 4017f6e84..1220bc98a 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -4,7 +4,7 @@ from unittest import mock
 from warnings import catch_warnings
 
 from scrapy.exceptions import ScrapyDeprecationWarning
-from scrapy.item import ABCMeta, DictItem, Field, Item, ItemMeta
+from scrapy.item import ABCMeta, BaseItem, DictItem, Field, Item, ItemMeta
 
 
 PY36_PLUS = (sys.version_info.major >= 3) and (sys.version_info.minor >= 6)
@@ -131,12 +131,12 @@ class ItemTest(unittest.TestCase):
         self.assertSortedEqual(list(item.values()), [u'New'])
 
     def test_metaclass_inheritance(self):
-        class BaseItem(Item):
+        class ParentItem(Item):
             name = Field()
             keys = Field()
             values = Field()
 
-        class TestItem(BaseItem):
+        class TestItem(ParentItem):
             keys = Field()
 
         i = TestItem()
@@ -321,13 +321,28 @@ class DictItemTest(unittest.TestCase):
 
     def test_deprecation_warning(self):
         with catch_warnings(record=True) as warnings:
-            dict_item = DictItem()
+            DictItem()
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
         with catch_warnings(record=True) as warnings:
             class SubclassedDictItem(DictItem):
                 pass
-            subclassed_dict_item = SubclassedDictItem()
+            SubclassedDictItem()
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+
+class BaseItemTest(unittest.TestCase):
+
+    def test_deprecation_warning(self):
+        with catch_warnings(record=True) as warnings:
+            BaseItem()
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+        with catch_warnings(record=True) as warnings:
+            class SubclassedBaseItem(BaseItem):
+                pass
+            SubclassedBaseItem()
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
 

From e1948b492317eb5b11550d119d91c61b74b3a37f Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 4 May 2020 09:07:27 -0300
Subject: [PATCH 081/181] Add example about bytes_received signal

---
 docs/topics/signals.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index bc04faad5..7fe63a7b0 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -378,7 +378,9 @@ bytes_received
 
     Sent by the HTTP 1.1 and S3 download handlers when a group of bytes is
     received for a specific request. This signal might be fired multiple
-    times for the same request, with partial data each time.
+    times for the same request, with partial data each time. For instance,
+    a possible scenario for a 25 kb response would be two signals fired
+    with 10 kb of data, and a final one with 5 kb of data.
 
     This signal does not support returning deferreds from its handlers.
 

From fe6154e4faee375e7f47d61ceafabde7a3289bf3 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Mon, 4 May 2020 18:18:38 +0500
Subject: [PATCH 082/181] clarify DOWNLOADER_HTTPCLIENTFACTORY docs

---
 docs/topics/settings.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index e3da1bd12..f06d9db3c 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -421,7 +421,7 @@ connections (for ``HTTP10DownloadHandler``).
 
     HTTP/1.0 is rarely used nowadays so you can safely ignore this setting,
     unless you really want to use HTTP/1.0 and override
-    :setting:`DOWNLOAD_HANDLERS_BASE` for ``http(s)`` scheme accordingly,
+    :setting:`DOWNLOAD_HANDLERS` for ``http(s)`` scheme accordingly,
     i.e. to ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.
 
 .. setting:: DOWNLOADER_CLIENTCONTEXTFACTORY

From 622ce860669b8bd7fc581b74d414aef24f4fb041 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 4 May 2020 16:22:24 -0300
Subject: [PATCH 083/181] Test: make sure scrapy.item.Item does not issue a
 deprecation warning

---
 tests/test_item.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_item.py b/tests/test_item.py
index 1220bc98a..f35a2b9f9 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -347,5 +347,18 @@ class BaseItemTest(unittest.TestCase):
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
 
 
+class ItemNoDeprecationWarningTest(unittest.TestCase):
+
+    def test_no_deprecation_warning(self):
+        with catch_warnings(record=True) as warnings:
+            Item()
+            self.assertEqual(len(warnings), 0)
+        with catch_warnings(record=True) as warnings:
+            class SubclassedItem(Item):
+                pass
+            SubclassedItem()
+            self.assertEqual(len(warnings), 0)
+
+
 if __name__ == "__main__":
     unittest.main()

From 7988c676a956457843416f8586544a58bf121f63 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Tue, 5 May 2020 13:11:01 -0300
Subject: [PATCH 084/181] Update Item docstring, update BaseItem occurrences

---
 docs/faq.rst               |  4 ++--
 pytest.ini                 |  2 +-
 scrapy/item.py             | 35 ++++++++++++++++++-----------------
 scrapy/spiders/feed.py     |  2 +-
 tests/test_loader.py       |  2 +-
 tests/test_utils_spider.py |  4 ++--
 6 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/docs/faq.rst b/docs/faq.rst
index 75a0f4864..79ef6ca85 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -342,14 +342,14 @@ method for this purpose. For example::
 
     from copy import deepcopy
 
-    from scrapy.item import BaseItem
+    from scrapy.item import Item
 
 
     class MultiplyItemsMiddleware:
 
         def process_spider_output(self, response, result, spider):
             for item in result:
-                if isinstance(item, (BaseItem, dict)):
+                if isinstance(item, (Item, dict)):
                     for _ in range(item['multiply_by']):
                         yield deepcopy(item)
 
diff --git a/pytest.ini b/pytest.ini
index e8911ee3f..5a86ce2a7 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -153,7 +153,7 @@ flake8-ignore =
     scrapy/exceptions.py E501
     scrapy/exporters.py E501
     scrapy/interfaces.py E501
-    scrapy/item.py E501 E128
+    scrapy/item.py E501
     scrapy/link.py E501
     scrapy/logformatter.py E501
     scrapy/mail.py E402 E128 E501
diff --git a/scrapy/item.py b/scrapy/item.py
index 3558b2231..46d20d017 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -15,18 +15,8 @@ from scrapy.utils.trackref import object_ref
 
 
 class BaseItem(object_ref):
-    """Base class for all scraped items.
-
-    In Scrapy, an object is considered an *item* if it is an instance of either
-    :class:`BaseItem` or :class:`dict`. For example, when the output of a
-    spider callback is evaluated, only instances of :class:`BaseItem` or
-    :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
-
-    If you need instances of a custom class to be considered items by Scrapy,
-    you must inherit from either :class:`BaseItem` or :class:`dict`.
-
-    Unlike instances of :class:`dict`, instances of :class:`BaseItem` may be
-    :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
+    """
+    Deprecated, please use :class:`scrapy.item.Item` instead
     """
 
     def __new__(cls, *args, **kwargs):
@@ -91,8 +81,7 @@ class DictItem(MutableMapping, BaseItem):
         if key in self.fields:
             self._values[key] = value
         else:
-            raise KeyError("%s does not support field: %s" %
-                (self.__class__.__name__, key))
+            raise KeyError("%s does not support field: %s" % (self.__class__.__name__, key))
 
     def __delitem__(self, key):
         del self._values[key]
@@ -104,8 +93,7 @@ class DictItem(MutableMapping, BaseItem):
 
     def __setattr__(self, name, value):
         if not name.startswith('_'):
-            raise AttributeError("Use item[%r] = %r to set field value" %
-                (name, value))
+            raise AttributeError("Use item[%r] = %r to set field value" % (name, value))
         super(DictItem, self).__setattr__(name, value)
 
     def __len__(self):
@@ -132,4 +120,17 @@ class DictItem(MutableMapping, BaseItem):
 
 
 class Item(DictItem, metaclass=ItemMeta):
-    pass
+    """
+    Base class for scraped items.
+
+    In Scrapy, an object is considered an *item* if it is an instance of either
+    :class:`Item` or :class:`dict`. For example, when the output of a
+    spider callback is evaluated, only instances of :class:`Item` or
+    :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
+
+    If you need instances of a custom class to be considered items by Scrapy,
+    you must inherit from either :class:`Item` or :class:`dict`.
+
+    Unlike instances of :class:`dict`, instances of :class:`Item` may be
+    :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
+    """
diff --git a/scrapy/spiders/feed.py b/scrapy/spiders/feed.py
index c566f0236..a4ff8010d 100644
--- a/scrapy/spiders/feed.py
+++ b/scrapy/spiders/feed.py
@@ -52,7 +52,7 @@ class XMLFeedSpider(Spider):
         """This method is called for the nodes matching the provided tag name
         (itertag). Receives the response and an Selector for each node.
         Overriding this method is mandatory. Otherwise, you spider won't work.
-        This method must return either a BaseItem, a Request, or a list
+        This method must return either an item, a request, or a list
         containing any of them.
         """
 
diff --git a/tests/test_loader.py b/tests/test_loader.py
index 701d568dc..f14714c75 100644
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@@ -601,7 +601,7 @@ class NoInputReprocessingItemLoader(BaseNoInputReprocessingLoader):
 
 class NoInputReprocessingFromItemTest(unittest.TestCase):
     """
-    Loaders initialized from loaded items must not reprocess fields (BaseItem instances)
+    Loaders initialized from loaded items must not reprocess fields (Item instances)
     """
     def test_avoid_reprocessing_with_initial_values_single(self):
         il = NoInputReprocessingItemLoader(item=NoInputReprocessingItem(title='foo'))
diff --git a/tests/test_utils_spider.py b/tests/test_utils_spider.py
index ee7d17062..3c87268ab 100644
--- a/tests/test_utils_spider.py
+++ b/tests/test_utils_spider.py
@@ -2,7 +2,7 @@ import unittest
 
 from scrapy import Spider
 from scrapy.http import Request
-from scrapy.item import BaseItem
+from scrapy.item import Item
 from scrapy.utils.spider import iterate_spider_output, iter_spider_classes
 
 
@@ -17,7 +17,7 @@ class MySpider2(Spider):
 class UtilsSpidersTestCase(unittest.TestCase):
 
     def test_iterate_spider_output(self):
-        i = BaseItem()
+        i = Item()
         r = Request('http://scrapytest.org')
         o = object()
 

From 17c0cf64aee1641e1ad33c5b46a61435c5969f2f Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Tue, 5 May 2020 19:14:48 -0300
Subject: [PATCH 085/181] Flake8: remove W504 code (#4525)

Co-authored-by: Mikhail Korobov <kmike84@gmail.com>
---
 pytest.ini                               | 16 ++++++++--------
 scrapy/contracts/__init__.py             |  4 ++--
 scrapy/downloadermiddlewares/redirect.py | 11 +++++++----
 scrapy/extensions/telnet.py              |  6 ++++--
 scrapy/linkextractors/__init__.py        |  3 +--
 scrapy/spidermiddlewares/referer.py      | 14 ++++++++------
 scrapy/utils/gz.py                       |  3 +--
 tests/test_utils_http.py                 |  8 ++++----
 8 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index e8911ee3f..4f3494e0e 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -44,12 +44,12 @@ flake8-ignore =
     scrapy/commands/startproject.py E127 E501 E128
     scrapy/commands/version.py E501 E128
     # scrapy/contracts
-    scrapy/contracts/__init__.py E501 W504
+    scrapy/contracts/__init__.py E501
     scrapy/contracts/default.py E128
     # scrapy/core
     scrapy/core/engine.py E501 E128 E127
     scrapy/core/scheduler.py E501
-    scrapy/core/scraper.py E501 E128 W504
+    scrapy/core/scraper.py E501 E128
     scrapy/core/spidermw.py E501 E126
     scrapy/core/downloader/__init__.py E501
     scrapy/core/downloader/contextfactory.py E501 E128 E126
@@ -68,7 +68,7 @@ flake8-ignore =
     scrapy/downloadermiddlewares/httpcache.py E501 E126
     scrapy/downloadermiddlewares/httpcompression.py E501 E128
     scrapy/downloadermiddlewares/httpproxy.py E501
-    scrapy/downloadermiddlewares/redirect.py E501 W504
+    scrapy/downloadermiddlewares/redirect.py E501
     scrapy/downloadermiddlewares/retry.py E501 E126
     scrapy/downloadermiddlewares/robotstxt.py E501
     scrapy/downloadermiddlewares/stats.py E501
@@ -79,7 +79,7 @@ flake8-ignore =
     scrapy/extensions/httpcache.py E128 E501
     scrapy/extensions/memdebug.py E501
     scrapy/extensions/spiderstate.py E501
-    scrapy/extensions/telnet.py E501 W504
+    scrapy/extensions/telnet.py E501
     scrapy/extensions/throttle.py E501
     # scrapy/http
     scrapy/http/common.py E501
@@ -90,7 +90,7 @@ flake8-ignore =
     scrapy/http/response/__init__.py E501 E128
     scrapy/http/response/text.py E501 E128 E124
     # scrapy/linkextractors
-    scrapy/linkextractors/__init__.py E501 E402 W504
+    scrapy/linkextractors/__init__.py E501 E402
     scrapy/linkextractors/lxmlhtml.py E501
     # scrapy/loader
     scrapy/loader/__init__.py E501 E128
@@ -110,7 +110,7 @@ flake8-ignore =
     # scrapy/spidermiddlewares
     scrapy/spidermiddlewares/httperror.py E501
     scrapy/spidermiddlewares/offsite.py E501
-    scrapy/spidermiddlewares/referer.py E501 E129 W504
+    scrapy/spidermiddlewares/referer.py E501 E129
     scrapy/spidermiddlewares/urllength.py E501
     # scrapy/spiders
     scrapy/spiders/__init__.py E501 E402
@@ -125,7 +125,7 @@ flake8-ignore =
     scrapy/utils/decorators.py E501
     scrapy/utils/defer.py E501 E128
     scrapy/utils/deprecate.py E128 E501 E127
-    scrapy/utils/gz.py E501 W504
+    scrapy/utils/gz.py E501
     scrapy/utils/http.py F403
     scrapy/utils/httpobj.py E501
     scrapy/utils/iterators.py E501
@@ -234,7 +234,7 @@ flake8-ignore =
     tests/test_utils_datatypes.py E402 E501
     tests/test_utils_defer.py E501 F841
     tests/test_utils_deprecate.py F841 E501
-    tests/test_utils_http.py E501 E128 W504
+    tests/test_utils_http.py E501 E128
     tests/test_utils_iterators.py E501 E128 E129
     tests/test_utils_log.py E741
     tests/test_utils_python.py E501
diff --git a/scrapy/contracts/__init__.py b/scrapy/contracts/__init__.py
index 41d4f25b2..5af3831a2 100644
--- a/scrapy/contracts/__init__.py
+++ b/scrapy/contracts/__init__.py
@@ -17,10 +17,10 @@ class ContractsManager:
             self.contracts[contract.name] = contract
 
     def tested_methods_from_spidercls(self, spidercls):
+        is_method = re.compile(r"^\s*@", re.MULTILINE).search
         methods = []
         for key, value in getmembers(spidercls):
-            if (callable(value) and value.__doc__ and
-                    re.search(r'^\s*@', value.__doc__, re.MULTILINE)):
+            if callable(value) and value.__doc__ and is_method(value.__doc__):
                 methods.append(key)
 
         return methods
diff --git a/scrapy/downloadermiddlewares/redirect.py b/scrapy/downloadermiddlewares/redirect.py
index 09ee8377e..b32afb8e4 100644
--- a/scrapy/downloadermiddlewares/redirect.py
+++ b/scrapy/downloadermiddlewares/redirect.py
@@ -60,11 +60,14 @@ class RedirectMiddleware(BaseRedirectMiddleware):
     Handle redirection of requests based on response status
     and meta-refresh html tag.
     """
+
     def process_response(self, request, response, spider):
-        if (request.meta.get('dont_redirect', False) or
-                response.status in getattr(spider, 'handle_httpstatus_list', []) or
-                response.status in request.meta.get('handle_httpstatus_list', []) or
-                request.meta.get('handle_httpstatus_all', False)):
+        if (
+            request.meta.get('dont_redirect', False)
+            or response.status in getattr(spider, 'handle_httpstatus_list', [])
+            or response.status in request.meta.get('handle_httpstatus_list', [])
+            or request.meta.get('handle_httpstatus_all', False)
+        ):
             return response
 
         allowed_status = (301, 302, 303, 307, 308)
diff --git a/scrapy/extensions/telnet.py b/scrapy/extensions/telnet.py
index 04ffd7235..1663604e7 100644
--- a/scrapy/extensions/telnet.py
+++ b/scrapy/extensions/telnet.py
@@ -76,8 +76,10 @@ class TelnetConsole(protocol.ServerFactory):
             """An implementation of IPortal"""
             @defers
             def login(self_, credentials, mind, *interfaces):
-                if not (credentials.username == self.username.encode('utf8') and
-                        credentials.checkPassword(self.password.encode('utf8'))):
+                if not (
+                    credentials.username == self.username.encode('utf8')
+                    and credentials.checkPassword(self.password.encode('utf8'))
+                ):
                     raise ValueError("Invalid credentials")
 
                 protocol = telnet.TelnetBootstrapProtocol(
diff --git a/scrapy/linkextractors/__init__.py b/scrapy/linkextractors/__init__.py
index d0b5066b6..ae019c70f 100644
--- a/scrapy/linkextractors/__init__.py
+++ b/scrapy/linkextractors/__init__.py
@@ -61,8 +61,7 @@ class FilteringLinkExtractor:
 
     def __new__(cls, *args, **kwargs):
         from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
-        if (issubclass(cls, FilteringLinkExtractor) and
-                not issubclass(cls, LxmlLinkExtractor)):
+        if issubclass(cls, FilteringLinkExtractor) and not issubclass(cls, LxmlLinkExtractor):
             warn('scrapy.linkextractors.FilteringLinkExtractor is deprecated, '
                  'please use scrapy.linkextractors.LinkExtractor instead',
                  ScrapyDeprecationWarning, stacklevel=2)
diff --git a/scrapy/spidermiddlewares/referer.py b/scrapy/spidermiddlewares/referer.py
index 3784de885..434067b00 100644
--- a/scrapy/spidermiddlewares/referer.py
+++ b/scrapy/spidermiddlewares/referer.py
@@ -163,9 +163,10 @@ class StrictOriginPolicy(ReferrerPolicy):
     name = POLICY_STRICT_ORIGIN
 
     def referrer(self, response_url, request_url):
-        if ((self.tls_protected(response_url) and
-             self.potentially_trustworthy(request_url))
-            or not self.tls_protected(response_url)):
+        if (
+            self.tls_protected(response_url) and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
             return self.origin_referrer(response_url)
 
 
@@ -213,9 +214,10 @@ class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
         origin = self.origin(response_url)
         if origin == self.origin(request_url):
             return self.stripped_referrer(response_url)
-        elif ((self.tls_protected(response_url) and
-               self.potentially_trustworthy(request_url))
-              or not self.tls_protected(response_url)):
+        elif (
+            self.tls_protected(response_url) and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
             return self.origin_referrer(response_url)
 
 
diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py
index c291ae237..fbd7bd18f 100644
--- a/scrapy/utils/gz.py
+++ b/scrapy/utils/gz.py
@@ -52,8 +52,7 @@ def is_gzipped(response):
     """Return True if the response is gzipped, or False otherwise"""
     ctype = response.headers.get('Content-Type', b'')
     cenc = response.headers.get('Content-Encoding', b'').lower()
-    return (_is_gzipped(ctype) or
-            (_is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')))
+    return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
 
 
 def gzip_magic_number(response):
diff --git a/tests/test_utils_http.py b/tests/test_utils_http.py
index 2fac3da1f..363b015a8 100644
--- a/tests/test_utils_http.py
+++ b/tests/test_utils_http.py
@@ -13,7 +13,7 @@ class ChunkedTest(unittest.TestCase):
         chunked_body += "8\r\n" + "sequence\r\n"
         chunked_body += "0\r\n\r\n"
         body = decode_chunked_transfer(chunked_body)
-        self.assertEqual(body,
-                         "This is the data in the first chunk\r\n" +
-                         "and this is the second one\r\n" +
-                         "consequence")
+        self.assertEqual(
+            body,
+            "This is the data in the first chunk\r\nand this is the second one\r\nconsequence"
+        )

From 418b9b5f5222e05bec497a83af8c7dfcca30c6cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 6 May 2020 11:15:02 +0200
Subject: [PATCH 086/181] Travis CI: do not run security and Flake8 on multiple
 jobs

---
 .travis.yml | 11 +++++++----
 tox.ini     |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6bde973f4..75d3c5a98 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,14 +15,17 @@ matrix:
       python: 3.7  # Keep in sync with .readthedocs.yml
 
     - env: TOXENV=pypy3
-    - python: 3.5
+    - env: TOXENV=py
+      python: 3.5
     - env: TOXENV=pinned
       python: 3.5
     - env: TOXENV=asyncio
       python: 3.5.2
-    - python: 3.6
-    - python: 3.7
-    - env: PYPI_RELEASE_JOB=true
+    - env: TOXENV=py
+      python: 3.6
+    - env: TOXENV=py
+      python: 3.7
+    - env: TOXENV=py PYPI_RELEASE_JOB=true
       python: 3.8
     - env: TOXENV=extra-deps
       python: 3.8
diff --git a/tox.ini b/tox.ini
index 697328ebd..2102fc602 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = security,flake8,py3
+envlist = security,flake8,py
 minversion = 1.7.0
 
 [testenv]

From 8d1e3ee0dd8d48428f719a0fffda964d30ab956d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 09:24:32 -0300
Subject: [PATCH 087/181] Remove deprecated BaseItem from the docs

---
 docs/topics/items.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/topics/items.rst b/docs/topics/items.rst
index 78612f524..0941a8a1b 100644
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@@ -257,6 +257,4 @@ Field objects
 Other classes related to Item
 =============================
 
-.. autoclass:: BaseItem
-
 .. autoclass:: ItemMeta

From 49e8a337f78ec5e30eacfcd201b66d68deeecb56 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 09:37:01 -0300
Subject: [PATCH 088/181] Flake8: remove E127 (continuation line over-indented
 for visual indent)

---
 pytest.ini                             | 10 +++++-----
 scrapy/core/downloader/handlers/ftp.py | 11 ++++++-----
 scrapy/core/engine.py                  |  3 +--
 scrapy/utils/deprecate.py              | 21 +++++++++++----------
 scrapy/utils/request.py                |  3 +--
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 4f3494e0e..fa65a0da2 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -41,13 +41,13 @@ flake8-ignore =
     scrapy/commands/runspider.py E501
     scrapy/commands/settings.py E128
     scrapy/commands/shell.py E128 E501
-    scrapy/commands/startproject.py E127 E501 E128
+    scrapy/commands/startproject.py E501 E128
     scrapy/commands/version.py E501 E128
     # scrapy/contracts
     scrapy/contracts/__init__.py E501
     scrapy/contracts/default.py E128
     # scrapy/core
-    scrapy/core/engine.py E501 E128 E127
+    scrapy/core/engine.py E501 E128
     scrapy/core/scheduler.py E501
     scrapy/core/scraper.py E501 E128
     scrapy/core/spidermw.py E501 E126
@@ -57,7 +57,7 @@ flake8-ignore =
     scrapy/core/downloader/tls.py E501
     scrapy/core/downloader/webclient.py E501 E128 E126
     scrapy/core/downloader/handlers/__init__.py E501
-    scrapy/core/downloader/handlers/ftp.py E501 E128 E127
+    scrapy/core/downloader/handlers/ftp.py E501 E128
     scrapy/core/downloader/handlers/http10.py E501
     scrapy/core/downloader/handlers/http11.py E501
     scrapy/core/downloader/handlers/s3.py E501 E128 E126
@@ -124,7 +124,7 @@ flake8-ignore =
     scrapy/utils/datatypes.py E501
     scrapy/utils/decorators.py E501
     scrapy/utils/defer.py E501 E128
-    scrapy/utils/deprecate.py E128 E501 E127
+    scrapy/utils/deprecate.py E501
     scrapy/utils/gz.py E501
     scrapy/utils/http.py F403
     scrapy/utils/httpobj.py E501
@@ -137,7 +137,7 @@ flake8-ignore =
     scrapy/utils/python.py E501
     scrapy/utils/reactor.py E501
     scrapy/utils/reqser.py E501
-    scrapy/utils/request.py E127 E501
+    scrapy/utils/request.py E501
     scrapy/utils/response.py E501 E128
     scrapy/utils/signal.py E501 E128
     scrapy/utils/sitemap.py E501
diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py
index 432cb1831..94b55c347 100644
--- a/scrapy/core/downloader/handlers/ftp.py
+++ b/scrapy/core/downloader/handlers/ftp.py
@@ -94,11 +94,12 @@ class FTPDownloadHandler:
     def gotClient(self, client, request, filepath):
         self.client = client
         protocol = ReceivedDataProtocol(request.meta.get("ftp_local_filename"))
-        return client.retrieveFile(filepath, protocol)\
-                .addCallbacks(callback=self._build_response,
-                        callbackArgs=(request, protocol),
-                        errback=self._failed,
-                        errbackArgs=(request,))
+        return client.retrieveFile(filepath, protocol).addCallbacks(
+            callback=self._build_response,
+            callbackArgs=(request, protocol),
+            errback=self._failed,
+            errbackArgs=(request,),
+        )
 
     def _build_response(self, result, request, protocol):
         self.result = result
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index 77d71846e..324d21716 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -230,8 +230,7 @@ class ExecutionEngine:
 
     def _downloaded(self, response, slot, request, spider):
         slot.remove_request(request)
-        return self.download(response, spider) \
-                if isinstance(response, Request) else response
+        return self.download(response, spider) if isinstance(response, Request) else response
 
     def _download(self, request, spider):
         slot = self.slot
diff --git a/scrapy/utils/deprecate.py b/scrapy/utils/deprecate.py
index 36001d982..3dbea5fee 100644
--- a/scrapy/utils/deprecate.py
+++ b/scrapy/utils/deprecate.py
@@ -15,16 +15,17 @@ def attribute(obj, oldattr, newattr, version='0.12'):
         stacklevel=3)
 
 
-def create_deprecated_class(name, new_class, clsdict=None,
-                            warn_category=ScrapyDeprecationWarning,
-                            warn_once=True,
-                            old_class_path=None,
-                            new_class_path=None,
-                            subclass_warn_message="{cls} inherits from "
-                                    "deprecated class {old}, please inherit "
-                                    "from {new}.",
-                            instance_warn_message="{cls} is deprecated, "
-                                    "instantiate {new} instead."):
+def create_deprecated_class(
+    name,
+    new_class,
+    clsdict=None,
+    warn_category=ScrapyDeprecationWarning,
+    warn_once=True,
+    old_class_path=None,
+    new_class_path=None,
+    subclass_warn_message="{cls} inherits from deprecated class {old}, please inherit from {new}.",
+    instance_warn_message="{cls} is deprecated, instantiate {new} instead."
+):
     """
     Return a "deprecated" class that causes its subclasses to issue a warning.
     Subclasses of ``new_class`` are considered subclasses of this class.
diff --git a/scrapy/utils/request.py b/scrapy/utils/request.py
index b8c140a7e..12c03d78e 100644
--- a/scrapy/utils/request.py
+++ b/scrapy/utils/request.py
@@ -50,8 +50,7 @@ def request_fingerprint(request, include_headers=None, keep_fragments=False):
 
     """
     if include_headers:
-        include_headers = tuple(to_bytes(h.lower())
-                                 for h in sorted(include_headers))
+        include_headers = tuple(to_bytes(h.lower()) for h in sorted(include_headers))
     cache = _fingerprint_cache.setdefault(request, {})
     cache_key = (include_headers, keep_fragments)
     if cache_key not in cache:

From fe0c582ee083ad8085a33443af0ffbc67b44fc16 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 09:49:10 -0300
Subject: [PATCH 089/181] Flake8: remove E127 in tests (continuation line
 over-indented for visual indent)

---
 pytest.ini                                    |  18 +--
 tests/spiders.py                              |   3 +-
 tests/test_closespider.py                     |   3 +-
 tests/test_downloader_handlers.py             |   9 +-
 ...test_downloadermiddleware_decompression.py |   4 +-
 tests/test_downloadermiddleware_redirect.py   |   3 +-
 tests/test_http_request.py                    |   6 +-
 tests/test_selector.py                        |   3 +-
 tests/test_spidermiddleware_httperror.py      |   6 +-
 tests/test_utils_url.py                       | 120 ++++++++++--------
 10 files changed, 90 insertions(+), 85 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index fa65a0da2..3eefe70f1 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -171,8 +171,8 @@ flake8-ignore =
     tests/__init__.py E402 E501
     tests/mockserver.py E401 E501 E126 E123
     tests/pipelines.py F841
-    tests/spiders.py E501 E127
-    tests/test_closespider.py E501 E127
+    tests/spiders.py E501
+    tests/test_closespider.py E501
     tests/test_command_fetch.py E501
     tests/test_command_parse.py E501 E128
     tests/test_command_shell.py E501 E128
@@ -181,17 +181,17 @@ flake8-ignore =
     tests/test_crawl.py E501 E741
     tests/test_crawler.py F841 E501
     tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123
+    tests/test_downloader_handlers.py E124 E128 E501 E126 E123
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
     tests/test_downloadermiddleware_cookies.py E741 E501 E128 E126
-    tests/test_downloadermiddleware_decompression.py E127
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
     tests/test_downloadermiddleware_httpcache.py E501
     tests/test_downloadermiddleware_httpcompression.py E501 E126 E123
+    tests/test_downloadermiddleware_decompression.py E501
     tests/test_downloadermiddleware_httpproxy.py E501 E128
-    tests/test_downloadermiddleware_redirect.py E501 E128 E127
+    tests/test_downloadermiddleware_redirect.py E501 E128
     tests/test_downloadermiddleware_retry.py E501 E128 E126
     tests/test_downloadermiddleware_robotstxt.py E501
     tests/test_downloadermiddleware_stats.py E501
@@ -202,7 +202,7 @@ flake8-ignore =
     tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
-    tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
+    tests/test_http_request.py E402 E501 E128 E128 E126 E123
     tests/test_http_response.py E501 E128
     tests/test_item.py E128 F841
     tests/test_link.py E501
@@ -220,10 +220,10 @@ flake8-ignore =
     tests/test_responsetypes.py E501
     tests/test_robotstxt_interface.py E501 E501
     tests/test_scheduler.py E501 E126 E123
-    tests/test_selector.py E501 E127
+    tests/test_selector.py E501
     tests/test_spider.py E501
     tests/test_spidermiddleware.py E501
-    tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
+    tests/test_spidermiddleware_httperror.py E128 E501 E121
     tests/test_spidermiddleware_offsite.py E501 E128 E111
     tests/test_spidermiddleware_output_chain.py E501
     tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
@@ -243,7 +243,7 @@ flake8-ignore =
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841
     tests/test_utils_sitemap.py E128 E501 E124
-    tests/test_utils_url.py E501 E127 E125 E501 E126 E123
+    tests/test_utils_url.py E501 E125 E501 E126 E123
     tests/test_webclient.py E501 E128 E122 E402 E123 E126
     tests/test_cmdline/__init__.py E501
     tests/test_settings/__init__.py E501 E128
diff --git a/tests/spiders.py b/tests/spiders.py
index 284c77829..33d5d02e1 100644
--- a/tests/spiders.py
+++ b/tests/spiders.py
@@ -184,8 +184,7 @@ class BrokenStartRequestsSpider(FollowAllSpider):
             if self.fail_yielding:
                 2 / 0
 
-        assert self.seedsseen, \
-                'All start requests consumed before any download happened'
+        assert self.seedsseen, 'All start requests consumed before any download happened'
 
     def parse(self, response):
         self.seedsseen.append(response.meta.get('seed'))
diff --git a/tests/test_closespider.py b/tests/test_closespider.py
index 4a56425b7..5ec5e2989 100644
--- a/tests/test_closespider.py
+++ b/tests/test_closespider.py
@@ -41,8 +41,7 @@ class TestCloseSpider(TestCase):
         yield crawler.crawl(total=1000000, mockserver=self.mockserver)
         reason = crawler.spider.meta['close_reason']
         self.assertEqual(reason, 'closespider_errorcount')
-        key = 'spider_exceptions/{name}'\
-                .format(name=crawler.spider.exception_cls.__name__)
+        key = 'spider_exceptions/{name}'.format(name=crawler.spider.exception_cls.__name__)
         errorcount = crawler.stats.get_value(key)
         self.assertTrue(errorcount >= close_on)
 
diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
index 29d06bab4..24ef560c1 100644
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -1090,8 +1090,7 @@ class DataURITestCase(unittest.TestCase):
     def test_default_mediatype_encoding(self):
         def _test(response):
             self.assertEqual(response.text, 'A brief note')
-            self.assertEqual(type(response),
-                              responsetypes.from_mimetype("text/plain"))
+            self.assertEqual(type(response), responsetypes.from_mimetype("text/plain"))
             self.assertEqual(response.encoding, "US-ASCII")
 
         request = Request("data:,A%20brief%20note")
@@ -1100,8 +1099,7 @@ class DataURITestCase(unittest.TestCase):
     def test_default_mediatype(self):
         def _test(response):
             self.assertEqual(response.text, u'\u038e\u03a3\u038e')
-            self.assertEqual(type(response),
-                              responsetypes.from_mimetype("text/plain"))
+            self.assertEqual(type(response), responsetypes.from_mimetype("text/plain"))
             self.assertEqual(response.encoding, "iso-8859-7")
 
         request = Request("data:;charset=iso-8859-7,%be%d3%be")
@@ -1119,8 +1117,7 @@ class DataURITestCase(unittest.TestCase):
     def test_mediatype_parameters(self):
         def _test(response):
             self.assertEqual(response.text, u'\u038e\u03a3\u038e')
-            self.assertEqual(type(response),
-                              responsetypes.from_mimetype("text/plain"))
+            self.assertEqual(type(response), responsetypes.from_mimetype("text/plain"))
             self.assertEqual(response.encoding, "utf-8")
 
         request = Request('data:text/plain;foo=%22foo;bar%5C%22%22;'
diff --git a/tests/test_downloadermiddleware_decompression.py b/tests/test_downloadermiddleware_decompression.py
index 77b35a8c3..dbae4d3ae 100644
--- a/tests/test_downloadermiddleware_decompression.py
+++ b/tests/test_downloadermiddleware_decompression.py
@@ -28,8 +28,8 @@ class DecompressionMiddlewareTest(TestCase):
         for fmt in self.test_formats:
             rsp = self.test_responses[fmt]
             new = self.mw.process_response(None, rsp, self.spider)
-            assert isinstance(new, XmlResponse), \
-                    'Failed %s, response type %s' % (fmt, type(new).__name__)
+            error_msg = 'Failed %s, response type %s' % (fmt, type(new).__name__)
+            assert isinstance(new, XmlResponse), error_msg
             assert_samelines(self, new.body, self.uncompressed_body, fmt)
 
     def test_plain_response(self):
diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
index 053e26fc3..551e124ab 100644
--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -181,8 +181,7 @@ class RedirectMiddlewareTest(unittest.TestCase):
             rsp = Response(url, headers={'Location': url2}, status=301, request=req)
             r = self.mw.process_response(req, rsp, self.spider)
             self.assertIs(r, rsp)
-        _test_passthrough(Request(url, meta={'handle_httpstatus_list':
-                                                           [404, 301, 302]}))
+        _test_passthrough(Request(url, meta={'handle_httpstatus_list': [404, 301, 302]}))
         _test_passthrough(Request(url, meta={'handle_httpstatus_all': True}))
 
     def test_latin1_location(self):
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index cc2cddda4..b12841ba2 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -399,8 +399,7 @@ class FormRequestTest(RequestTest):
 
     def test_custom_encoding_bytes(self):
         data = {b'\xb5 one': b'two', b'price': b'\xa3 100'}
-        r2 = self.request_class("http://www.example.com", formdata=data,
-                                    encoding='latin1')
+        r2 = self.request_class("http://www.example.com", formdata=data, encoding='latin1')
         self.assertEqual(r2.method, 'POST')
         self.assertEqual(r2.encoding, 'latin1')
         self.assertQueryEqual(r2.body, b'price=%A3+100&%B5+one=two')
@@ -408,8 +407,7 @@ class FormRequestTest(RequestTest):
 
     def test_custom_encoding_textual_data(self):
         data = {'price': u'£ 100'}
-        r3 = self.request_class("http://www.example.com", formdata=data,
-                                    encoding='latin1')
+        r3 = self.request_class("http://www.example.com", formdata=data, encoding='latin1')
         self.assertEqual(r3.encoding, 'latin1')
         self.assertEqual(r3.body, b'price=%A3+100')
 
diff --git a/tests/test_selector.py b/tests/test_selector.py
index 09c2546fb..65b0f5860 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -67,8 +67,7 @@ class SelectorTestCase(unittest.TestCase):
         headers = {'Content-Type': ['text/html; charset=utf-8']}
         response = HtmlResponse(url="http://example.com", headers=headers, body=html_utf8)
         x = Selector(response)
-        self.assertEqual(x.xpath("//span[@id='blank']/text()").getall(),
-                          [u'\xa3'])
+        self.assertEqual(x.xpath("//span[@id='blank']/text()").getall(), [u'\xa3'])
 
     def test_badly_encoded_body(self):
         # \xe9 alone isn't valid utf8 sequence
diff --git a/tests/test_spidermiddleware_httperror.py b/tests/test_spidermiddleware_httperror.py
index dacd0147f..6b61df56f 100644
--- a/tests/test_spidermiddleware_httperror.py
+++ b/tests/test_spidermiddleware_httperror.py
@@ -111,8 +111,7 @@ class TestHttpErrorMiddlewareSettings(TestCase):
                 self.mw.process_spider_input(self.res402, self.spider))
 
     def test_meta_overrides_settings(self):
-        request = Request('http://scrapytest.org',
-                              meta={'handle_httpstatus_list': [404]})
+        request = Request('http://scrapytest.org', meta={'handle_httpstatus_list': [404]})
         res404 = self.res404.copy()
         res404.request = request
         res402 = self.res402.copy()
@@ -146,8 +145,7 @@ class TestHttpErrorMiddlewareHandleAll(TestCase):
                 self.mw.process_spider_input(self.res404, self.spider))
 
     def test_meta_overrides_settings(self):
-        request = Request('http://scrapytest.org',
-                              meta={'handle_httpstatus_list': [404]})
+        request = Request('http://scrapytest.org', meta={'handle_httpstatus_list': [404]})
         res404 = self.res404.copy()
         res404.request = request
         res402 = self.res402.copy()
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 72a16e9b1..3bb6d40db 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -77,108 +77,124 @@ class UrlUtilsTest(unittest.TestCase):
 class AddHttpIfNoScheme(unittest.TestCase):
 
     def test_add_scheme(self):
-        self.assertEqual(add_http_if_no_scheme('www.example.com'),
-                                               'http://www.example.com')
+        self.assertEqual(add_http_if_no_scheme('www.example.com'), 'http://www.example.com')
 
     def test_without_subdomain(self):
-        self.assertEqual(add_http_if_no_scheme('example.com'),
-                                               'http://example.com')
+        self.assertEqual(add_http_if_no_scheme('example.com'), 'http://example.com')
 
     def test_path(self):
-        self.assertEqual(add_http_if_no_scheme('www.example.com/some/page.html'),
-                                               'http://www.example.com/some/page.html')
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com/some/page.html'),
+            'http://www.example.com/some/page.html')
 
     def test_port(self):
-        self.assertEqual(add_http_if_no_scheme('www.example.com:80'),
-                                               'http://www.example.com:80')
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com:80'),
+            'http://www.example.com:80')
 
     def test_fragment(self):
-        self.assertEqual(add_http_if_no_scheme('www.example.com/some/page#frag'),
-                                               'http://www.example.com/some/page#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com/some/page#frag'),
+            'http://www.example.com/some/page#frag')
 
     def test_query(self):
-        self.assertEqual(add_http_if_no_scheme('www.example.com/do?a=1&b=2&c=3'),
-                                               'http://www.example.com/do?a=1&b=2&c=3')
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com/do?a=1&b=2&c=3'),
+            'http://www.example.com/do?a=1&b=2&c=3')
 
     def test_username_password(self):
-        self.assertEqual(add_http_if_no_scheme('username:password@www.example.com'),
-                                               'http://username:password@www.example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('username:password@www.example.com'),
+            'http://username:password@www.example.com')
 
     def test_complete_url(self):
-        self.assertEqual(add_http_if_no_scheme('username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
-                                               'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
 
     def test_preserve_http(self):
-        self.assertEqual(add_http_if_no_scheme('http://www.example.com'),
-                                               'http://www.example.com')
+        self.assertEqual(add_http_if_no_scheme('http://www.example.com'), 'http://www.example.com')
 
     def test_preserve_http_without_subdomain(self):
-        self.assertEqual(add_http_if_no_scheme('http://example.com'),
-                                               'http://example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('http://example.com'),
+            'http://example.com')
 
     def test_preserve_http_path(self):
-        self.assertEqual(add_http_if_no_scheme('http://www.example.com/some/page.html'),
-                                               'http://www.example.com/some/page.html')
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com/some/page.html'),
+            'http://www.example.com/some/page.html')
 
     def test_preserve_http_port(self):
-        self.assertEqual(add_http_if_no_scheme('http://www.example.com:80'),
-                                               'http://www.example.com:80')
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com:80'),
+            'http://www.example.com:80')
 
     def test_preserve_http_fragment(self):
-        self.assertEqual(add_http_if_no_scheme('http://www.example.com/some/page#frag'),
-                                               'http://www.example.com/some/page#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com/some/page#frag'),
+            'http://www.example.com/some/page#frag')
 
     def test_preserve_http_query(self):
-        self.assertEqual(add_http_if_no_scheme('http://www.example.com/do?a=1&b=2&c=3'),
-                                               'http://www.example.com/do?a=1&b=2&c=3')
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com/do?a=1&b=2&c=3'),
+            'http://www.example.com/do?a=1&b=2&c=3')
 
     def test_preserve_http_username_password(self):
-        self.assertEqual(add_http_if_no_scheme('http://username:password@www.example.com'),
-                                               'http://username:password@www.example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('http://username:password@www.example.com'),
+            'http://username:password@www.example.com')
 
     def test_preserve_http_complete_url(self):
-        self.assertEqual(add_http_if_no_scheme('http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
-                                               'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
 
     def test_protocol_relative(self):
-        self.assertEqual(add_http_if_no_scheme('//www.example.com'),
-                                               'http://www.example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com'), 'http://www.example.com')
 
     def test_protocol_relative_without_subdomain(self):
-        self.assertEqual(add_http_if_no_scheme('//example.com'),
-                                               'http://example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('//example.com'), 'http://example.com')
 
     def test_protocol_relative_path(self):
-        self.assertEqual(add_http_if_no_scheme('//www.example.com/some/page.html'),
-                                               'http://www.example.com/some/page.html')
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com/some/page.html'),
+            'http://www.example.com/some/page.html')
 
     def test_protocol_relative_port(self):
-        self.assertEqual(add_http_if_no_scheme('//www.example.com:80'),
-                                               'http://www.example.com:80')
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com:80'),
+            'http://www.example.com:80')
 
     def test_protocol_relative_fragment(self):
-        self.assertEqual(add_http_if_no_scheme('//www.example.com/some/page#frag'),
-                                               'http://www.example.com/some/page#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com/some/page#frag'),
+            'http://www.example.com/some/page#frag')
 
     def test_protocol_relative_query(self):
-        self.assertEqual(add_http_if_no_scheme('//www.example.com/do?a=1&b=2&c=3'),
-                                               'http://www.example.com/do?a=1&b=2&c=3')
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com/do?a=1&b=2&c=3'),
+            'http://www.example.com/do?a=1&b=2&c=3')
 
     def test_protocol_relative_username_password(self):
-        self.assertEqual(add_http_if_no_scheme('//username:password@www.example.com'),
-                                               'http://username:password@www.example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('//username:password@www.example.com'),
+            'http://username:password@www.example.com')
 
     def test_protocol_relative_complete_url(self):
-        self.assertEqual(add_http_if_no_scheme('//username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
-                                               'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+        self.assertEqual(
+            add_http_if_no_scheme('//username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
 
     def test_preserve_https(self):
-        self.assertEqual(add_http_if_no_scheme('https://www.example.com'),
-                                               'https://www.example.com')
+        self.assertEqual(
+            add_http_if_no_scheme('https://www.example.com'),
+            'https://www.example.com')
 
     def test_preserve_ftp(self):
-        self.assertEqual(add_http_if_no_scheme('ftp://www.example.com'),
-                                               'ftp://www.example.com')
+        self.assertEqual(add_http_if_no_scheme('ftp://www.example.com'), 'ftp://www.example.com')
 
 
 class GuessSchemeTest(unittest.TestCase):

From 63600243e08cb7e783798bd6c59fb97595488e9e Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 10:21:01 -0300
Subject: [PATCH 090/181] Flake8: remove E125 (Continuation line with same
 indent as next logical line)

Also remove E401 from pytest.ini - no occurrences in the codebase
---
 pytest.ini                             | 12 ++++----
 scrapy/pipelines/media.py              | 10 ++++---
 tests/test_spidermiddleware_referer.py | 40 +++++++++++++-------------
 tests/test_utils_url.py                | 14 ++++-----
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 3eefe70f1..8ed1ad0cf 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -35,7 +35,7 @@ flake8-ignore =
     scrapy/commands/check.py E501
     scrapy/commands/crawl.py E501
     scrapy/commands/edit.py E501
-    scrapy/commands/fetch.py E401 E501 E128
+    scrapy/commands/fetch.py E501 E128
     scrapy/commands/genspider.py E128 E501
     scrapy/commands/parse.py E128 E501
     scrapy/commands/runspider.py E501
@@ -99,7 +99,7 @@ flake8-ignore =
     scrapy/pipelines/__init__.py E501
     scrapy/pipelines/files.py E116 E501
     scrapy/pipelines/images.py E501
-    scrapy/pipelines/media.py E125 E501
+    scrapy/pipelines/media.py E501
     # scrapy/selector
     scrapy/selector/__init__.py F403
     scrapy/selector/unified.py E501 E111
@@ -169,7 +169,7 @@ flake8-ignore =
     scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501
-    tests/mockserver.py E401 E501 E126 E123
+    tests/mockserver.py E501 E126 E123
     tests/pipelines.py F841
     tests/spiders.py E501
     tests/test_closespider.py E501
@@ -196,7 +196,7 @@ flake8-ignore =
     tests/test_downloadermiddleware_robotstxt.py E501
     tests/test_downloadermiddleware_stats.py E501
     tests/test_dupefilters.py E501 E741 E128 E124
-    tests/test_engine.py E401 E501 E128
+    tests/test_engine.py E501 E128
     tests/test_exporters.py E501 E128 E124
     tests/test_extension_telnet.py F841
     tests/test_feedexport.py E501 F841
@@ -226,7 +226,7 @@ flake8-ignore =
     tests/test_spidermiddleware_httperror.py E128 E501 E121
     tests/test_spidermiddleware_offsite.py E501 E128 E111
     tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
+    tests/test_spidermiddleware_referer.py E501 F841 E124 E501 E121
     tests/test_squeues.py E501 E741
     tests/test_utils_asyncio.py E501
     tests/test_utils_conf.py E501 E128
@@ -243,7 +243,7 @@ flake8-ignore =
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841
     tests/test_utils_sitemap.py E128 E501 E124
-    tests/test_utils_url.py E501 E125 E501 E126 E123
+    tests/test_utils_url.py E501 E501 E126 E123
     tests/test_webclient.py E501 E128 E122 E402 E123 E126
     tests/test_cmdline/__init__.py E501
     tests/test_settings/__init__.py E501 E128
diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py
index 8a0636264..aa65f4f0e 100644
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@@ -43,8 +43,7 @@ class MediaPipeline:
         if allow_redirects:
             self.handle_httpstatus_list = SequenceExclude(range(300, 400))
 
-    def _key_for_pipe(self, key, base_class_name=None,
-                      settings=None):
+    def _key_for_pipe(self, key, base_class_name=None, settings=None):
         """
         >>> MediaPipeline()._key_for_pipe("IMAGES")
         'IMAGES'
@@ -55,8 +54,11 @@ class MediaPipeline:
         """
         class_name = self.__class__.__name__
         formatted_key = "{}_{}".format(class_name.upper(), key)
-        if class_name == base_class_name or not base_class_name \
-            or (settings and not settings.get(formatted_key)):
+        if (
+            not base_class_name
+            or class_name == base_class_name
+            or settings and not settings.get(formatted_key)
+        ):
             return key
         return formatted_key
 
diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py
index 742adc64f..41589177a 100644
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@@ -478,32 +478,32 @@ class TestSettingsPolicyByName(TestCase):
 
     def test_valid_name(self):
         for s, p in [
-                (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
-                (POLICY_NO_REFERRER, NoReferrerPolicy),
-                (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
-                (POLICY_SAME_ORIGIN, SameOriginPolicy),
-                (POLICY_ORIGIN, OriginPolicy),
-                (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
-                (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
-                (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
-                (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
-            ]:
+            (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
+            (POLICY_NO_REFERRER, NoReferrerPolicy),
+            (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
+            (POLICY_SAME_ORIGIN, SameOriginPolicy),
+            (POLICY_ORIGIN, OriginPolicy),
+            (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
+            (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
+            (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
+            (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
+        ]:
             settings = Settings({'REFERRER_POLICY': s})
             mw = RefererMiddleware(settings)
             self.assertEqual(mw.default_policy, p)
 
     def test_valid_name_casevariants(self):
         for s, p in [
-                (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
-                (POLICY_NO_REFERRER, NoReferrerPolicy),
-                (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
-                (POLICY_SAME_ORIGIN, SameOriginPolicy),
-                (POLICY_ORIGIN, OriginPolicy),
-                (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
-                (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
-                (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
-                (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
-            ]:
+            (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
+            (POLICY_NO_REFERRER, NoReferrerPolicy),
+            (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
+            (POLICY_SAME_ORIGIN, SameOriginPolicy),
+            (POLICY_ORIGIN, OriginPolicy),
+            (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
+            (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
+            (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
+            (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
+        ]:
             settings = Settings({'REFERRER_POLICY': s.upper()})
             mw = RefererMiddleware(settings)
             self.assertEqual(mw.default_policy, p)
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 3bb6d40db..bed1a5634 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -288,7 +288,7 @@ class StripUrl(unittest.TestCase):
             ('http://www.example.com',
              True,
              'http://www.example.com/'),
-            ]:
+        ]:
             self.assertEqual(strip_url(input_url, origin_only=origin), output_url)
 
     def test_credentials(self):
@@ -301,7 +301,7 @@ class StripUrl(unittest.TestCase):
 
             ('ftp://username:password@www.example.com/index.html?somekey=somevalue#section',
              'ftp://www.example.com/index.html?somekey=somevalue'),
-            ]:
+        ]:
             self.assertEqual(strip_url(i, strip_credentials=True), o)
 
     def test_credentials_encoded_delims(self):
@@ -320,7 +320,7 @@ class StripUrl(unittest.TestCase):
             # password: "user@domain.com"
             ('ftp://me:user%40domain.com@www.example.com/index.html?somekey=somevalue#section',
              'ftp://www.example.com/index.html?somekey=somevalue'),
-            ]:
+        ]:
             self.assertEqual(strip_url(i, strip_credentials=True), o)
 
     def test_default_ports_creds_off(self):
@@ -348,7 +348,7 @@ class StripUrl(unittest.TestCase):
 
             ('ftp://username:password@www.example.com:221/file.txt',
              'ftp://www.example.com:221/file.txt'),
-            ]:
+        ]:
             self.assertEqual(strip_url(i), o)
 
     def test_default_ports(self):
@@ -376,7 +376,7 @@ class StripUrl(unittest.TestCase):
 
             ('ftp://username:password@www.example.com:221/file.txt',
              'ftp://username:password@www.example.com:221/file.txt'),
-            ]:
+        ]:
             self.assertEqual(strip_url(i, strip_default_port=True, strip_credentials=False), o)
 
     def test_default_ports_keep(self):
@@ -404,7 +404,7 @@ class StripUrl(unittest.TestCase):
 
             ('ftp://username:password@www.example.com:221/file.txt',
              'ftp://username:password@www.example.com:221/file.txt'),
-            ]:
+        ]:
             self.assertEqual(strip_url(i, strip_default_port=False, strip_credentials=False), o)
 
     def test_origin_only(self):
@@ -420,7 +420,7 @@ class StripUrl(unittest.TestCase):
 
             ('https://username:password@www.example.com:443/index.html',
              'https://www.example.com/'),
-            ]:
+        ]:
             self.assertEqual(strip_url(i, origin_only=True), o)
 
 

From 628c4a531914b6803ae0ec4991363aad52069ca1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Panek?= <michal@panek.cloud>
Date: Wed, 6 May 2020 17:09:20 +0200
Subject: [PATCH 091/181] Add a warning/error in case of incorrect gcs
 permissions (#4508)

---
 scrapy/pipelines/files.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index ae365db5b..a9066986b 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -230,6 +230,20 @@ class GCSFilesStore:
         bucket, prefix = uri[5:].split('/', 1)
         self.bucket = client.bucket(bucket)
         self.prefix = prefix
+        permissions = self.bucket.test_iam_permissions(
+            ['storage.objects.get', 'storage.objects.create']
+        )
+        if 'storage.objects.get' not in permissions:
+            logger.warning(
+                "No 'storage.objects.get' permission for GSC bucket %(bucket)s. "
+                "Checking if files are up to date will be impossible. Files will be downloaded every time.",
+                {'bucket': bucket}
+            )
+        if 'storage.objects.create' not in permissions:
+            logger.error(
+                "No 'storage.objects.create' permission for GSC bucket %(bucket)s. Saving files will be impossible!",
+                {'bucket': bucket}
+            )
 
     def stat_file(self, path, info):
         def _onsuccess(blob):

From 8643e8d3557449393989b15b9b8f2ec813f3e6ad Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 12:26:04 -0300
Subject: [PATCH 092/181] Flake8: remove E123 (Closing bracket does not match
 indentation of opening bracket's line)

---
 pytest.ini                                    | 18 +++---
 scrapy/extensions/closespider.py              |  2 +-
 scrapy/http/request/form.py                   | 11 ++--
 tests/test_downloader_handlers.py             | 19 +++---
 ...st_downloadermiddleware_httpcompression.py | 24 +++----
 tests/test_http_request.py                    |  2 +-
 tests/test_scheduler.py                       | 30 +++++----
 tests/test_utils_url.py                       | 64 +++++++++++--------
 tests/test_webclient.py                       |  6 +-
 9 files changed, 95 insertions(+), 81 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 8ed1ad0cf..1a73b41be 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -73,7 +73,7 @@ flake8-ignore =
     scrapy/downloadermiddlewares/robotstxt.py E501
     scrapy/downloadermiddlewares/stats.py E501
     # scrapy/extensions
-    scrapy/extensions/closespider.py E501 E128 E123
+    scrapy/extensions/closespider.py E501 E128
     scrapy/extensions/corestats.py E501
     scrapy/extensions/feedexport.py E128 E501
     scrapy/extensions/httpcache.py E128 E501
@@ -85,7 +85,7 @@ flake8-ignore =
     scrapy/http/common.py E501
     scrapy/http/cookies.py E501
     scrapy/http/request/__init__.py E501
-    scrapy/http/request/form.py E501 E123
+    scrapy/http/request/form.py E501
     scrapy/http/request/json_request.py E501
     scrapy/http/response/__init__.py E501 E128
     scrapy/http/response/text.py E501 E128 E124
@@ -169,7 +169,7 @@ flake8-ignore =
     scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501
-    tests/mockserver.py E501 E126 E123
+    tests/mockserver.py E501 E126
     tests/pipelines.py F841
     tests/spiders.py E501
     tests/test_closespider.py E501
@@ -181,14 +181,14 @@ flake8-ignore =
     tests/test_crawl.py E501 E741
     tests/test_crawler.py F841 E501
     tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E124 E128 E501 E126 E123
+    tests/test_downloader_handlers.py E124 E128 E501 E126
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
     tests/test_downloadermiddleware_cookies.py E741 E501 E128 E126
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
     tests/test_downloadermiddleware_httpcache.py E501
-    tests/test_downloadermiddleware_httpcompression.py E501 E126 E123
+    tests/test_downloadermiddleware_httpcompression.py E501 E126
     tests/test_downloadermiddleware_decompression.py E501
     tests/test_downloadermiddleware_httpproxy.py E501 E128
     tests/test_downloadermiddleware_redirect.py E501 E128
@@ -202,7 +202,7 @@ flake8-ignore =
     tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
-    tests/test_http_request.py E402 E501 E128 E128 E126 E123
+    tests/test_http_request.py E402 E501 E128 E128 E126
     tests/test_http_response.py E501 E128
     tests/test_item.py E128 F841
     tests/test_link.py E501
@@ -219,7 +219,7 @@ flake8-ignore =
     tests/test_request_cb_kwargs.py E501
     tests/test_responsetypes.py E501
     tests/test_robotstxt_interface.py E501 E501
-    tests/test_scheduler.py E501 E126 E123
+    tests/test_scheduler.py E501 E126
     tests/test_selector.py E501
     tests/test_spider.py E501
     tests/test_spidermiddleware.py E501
@@ -243,8 +243,8 @@ flake8-ignore =
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841
     tests/test_utils_sitemap.py E128 E501 E124
-    tests/test_utils_url.py E501 E501 E126 E123
-    tests/test_webclient.py E501 E128 E122 E402 E123 E126
+    tests/test_utils_url.py E501 E501 E126
+    tests/test_webclient.py E501 E128 E122 E402 E126
     tests/test_cmdline/__init__.py E501
     tests/test_settings/__init__.py E501 E128
     tests/test_spiderloader/__init__.py E128 E501
diff --git a/scrapy/extensions/closespider.py b/scrapy/extensions/closespider.py
index e3f212bef..812844c0a 100644
--- a/scrapy/extensions/closespider.py
+++ b/scrapy/extensions/closespider.py
@@ -20,7 +20,7 @@ class CloseSpider:
             'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
             'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
             'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
-            }
+        }
 
         if not any(self.close_on.values()):
             raise NotConfigured
diff --git a/scrapy/http/request/form.py b/scrapy/http/request/form.py
index af02c8484..cd4e3373f 100644
--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@@ -178,12 +178,11 @@ def _get_clickable(clickdata, form):
     if the latter is given. If not, it returns the first
     clickable element found
     """
-    clickables = [
-        el for el in form.xpath(
-            'descendant::input[re:test(@type, "^(submit|image)$", "i")]'
-            '|descendant::button[not(@type) or re:test(@type, "^submit$", "i")]',
-            namespaces={"re": "http://exslt.org/regular-expressions"})
-        ]
+    clickables = list(form.xpath(
+        'descendant::input[re:test(@type, "^(submit|image)$", "i")]'
+        '|descendant::button[not(@type) or re:test(@type, "^submit$", "i")]',
+        namespaces={"re": "http://exslt.org/regular-expressions"}
+    ))
     if not clickables:
         return
 
diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
index 24ef560c1..f93bce8ef 100644
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -822,11 +822,15 @@ class S3TestCase(unittest.TestCase):
     def test_request_signing2(self):
         # puts an object into the johnsmith bucket.
         date = 'Tue, 27 Mar 2007 21:15:45 +0000'
-        req = Request('s3://johnsmith/photos/puppy.jpg', method='PUT', headers={
-            'Content-Type': 'image/jpeg',
-            'Date': date,
-            'Content-Length': '94328',
-            })
+        req = Request(
+            's3://johnsmith/photos/puppy.jpg',
+            method='PUT',
+            headers={
+                'Content-Type': 'image/jpeg',
+                'Date': date,
+                'Content-Length': '94328',
+            },
+        )
         with self._mocked_date(date):
             httpreq = self.download_request(req, self.spider)
         self.assertEqual(httpreq.headers['Authorization'],
@@ -906,11 +910,10 @@ class S3TestCase(unittest.TestCase):
         # ensure that spaces are quoted properly before signing
         date = 'Tue, 27 Mar 2007 19:42:41 +0000'
         req = Request(
-            ("s3://johnsmith/photos/my puppy.jpg"
-             "?response-content-disposition=my puppy.jpg"),
+            "s3://johnsmith/photos/my puppy.jpg?response-content-disposition=my puppy.jpg",
             method='GET',
             headers={'Date': date},
-            )
+        )
         with self._mocked_date(date):
             httpreq = self.download_request(req, self.spider)
         self.assertEqual(
diff --git a/tests/test_downloadermiddleware_httpcompression.py b/tests/test_downloadermiddleware_httpcompression.py
index 106ca3360..e86568bfb 100644
--- a/tests/test_downloadermiddleware_httpcompression.py
+++ b/tests/test_downloadermiddleware_httpcompression.py
@@ -16,12 +16,12 @@ from w3lib.encoding import resolve_encoding
 SAMPLEDIR = join(tests_datadir, 'compressed')
 
 FORMAT = {
-        'gzip': ('html-gzip.bin', 'gzip'),
-        'x-gzip': ('html-gzip.bin', 'gzip'),
-        'rawdeflate': ('html-rawdeflate.bin', 'deflate'),
-        'zlibdeflate': ('html-zlibdeflate.bin', 'deflate'),
-        'br': ('html-br.bin', 'br')
-        }
+    'gzip': ('html-gzip.bin', 'gzip'),
+    'x-gzip': ('html-gzip.bin', 'gzip'),
+    'rawdeflate': ('html-rawdeflate.bin', 'deflate'),
+    'zlibdeflate': ('html-zlibdeflate.bin', 'deflate'),
+    'br': ('html-br.bin', 'br'),
+}
 
 
 class HttpCompressionTest(TestCase):
@@ -40,12 +40,12 @@ class HttpCompressionTest(TestCase):
             body = sample.read()
 
         headers = {
-                'Server': 'Yaws/1.49 Yet Another Web Server',
-                'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
-                'Content-Length': len(body),
-                'Content-Type': 'text/html',
-                'Content-Encoding': contentencoding,
-                }
+            'Server': 'Yaws/1.49 Yet Another Web Server',
+            'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
+            'Content-Length': len(body),
+            'Content-Type': 'text/html',
+            'Content-Encoding': contentencoding,
+        }
 
         response = Response('http://scrapytest.org/', body=body, headers=headers)
         response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip, deflate'})
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index b12841ba2..3b6d119a9 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -467,7 +467,7 @@ class FormRequestTest(RequestTest):
             </form>""",
             url="http://www.example.com/this/list.html",
             encoding='latin1',
-            )
+        )
         req = self.request_class.from_response(response,
                 formdata={'one': ['two', 'three'], 'six': 'seven'})
 
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index 00568aee9..930a5dd99 100644
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -46,13 +46,13 @@ class MockCrawler(Crawler):
     def __init__(self, priority_queue_cls, jobdir):
 
         settings = dict(
-                SCHEDULER_DEBUG=False,
-                SCHEDULER_DISK_QUEUE='scrapy.squeues.PickleLifoDiskQueue',
-                SCHEDULER_MEMORY_QUEUE='scrapy.squeues.LifoMemoryQueue',
-                SCHEDULER_PRIORITY_QUEUE=priority_queue_cls,
-                JOBDIR=jobdir,
-                DUPEFILTER_CLASS='scrapy.dupefilters.BaseDupeFilter'
-                )
+            SCHEDULER_DEBUG=False,
+            SCHEDULER_DISK_QUEUE='scrapy.squeues.PickleLifoDiskQueue',
+            SCHEDULER_MEMORY_QUEUE='scrapy.squeues.LifoMemoryQueue',
+            SCHEDULER_PRIORITY_QUEUE=priority_queue_cls,
+            JOBDIR=jobdir,
+            DUPEFILTER_CLASS='scrapy.dupefilters.BaseDupeFilter',
+        )
         super(MockCrawler, self).__init__(Spider, settings)
         self.engine = MockEngine(downloader=MockDownloader())
 
@@ -305,10 +305,12 @@ class StartUrlsSpider(Spider):
 class TestIntegrationWithDownloaderAwareInMemory(TestCase):
     def setUp(self):
         self.crawler = get_crawler(
-                    StartUrlsSpider,
-                    {'SCHEDULER_PRIORITY_QUEUE': 'scrapy.pqueues.DownloaderAwarePriorityQueue',
-                     'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter'}
-                    )
+            spidercls=StartUrlsSpider,
+            settings_dict={
+                'SCHEDULER_PRIORITY_QUEUE': 'scrapy.pqueues.DownloaderAwarePriorityQueue',
+                'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter',
+            },
+        )
 
     @defer.inlineCallbacks
     def tearDown(self):
@@ -329,9 +331,9 @@ class TestIncompatibility(unittest.TestCase):
 
     def _incompatible(self):
         settings = dict(
-                SCHEDULER_PRIORITY_QUEUE='scrapy.pqueues.DownloaderAwarePriorityQueue',
-                CONCURRENT_REQUESTS_PER_IP=1
-                )
+            SCHEDULER_PRIORITY_QUEUE='scrapy.pqueues.DownloaderAwarePriorityQueue',
+            CONCURRENT_REQUESTS_PER_IP=1,
+        )
         crawler = Crawler(Spider, settings)
         scheduler = Scheduler.from_crawler(crawler)
         spider = Spider(name='spider')
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index bed1a5634..1f8388957 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -218,41 +218,49 @@ def create_skipped_scheme_t(args):
     return do_expected
 
 
-for k, args in enumerate([
-            ('/index', 'file://'),
-            ('/index.html', 'file://'),
-            ('./index.html', 'file://'),
-            ('../index.html', 'file://'),
-            ('../../index.html', 'file://'),
-            ('./data/index.html', 'file://'),
-            ('.hidden/data/index.html', 'file://'),
-            ('/home/user/www/index.html', 'file://'),
-            ('//home/user/www/index.html', 'file://'),
-            ('file:///home/user/www/index.html', 'file://'),
+for k, args in enumerate(
+    [
+        ('/index', 'file://'),
+        ('/index.html', 'file://'),
+        ('./index.html', 'file://'),
+        ('../index.html', 'file://'),
+        ('../../index.html', 'file://'),
+        ('./data/index.html', 'file://'),
+        ('.hidden/data/index.html', 'file://'),
+        ('/home/user/www/index.html', 'file://'),
+        ('//home/user/www/index.html', 'file://'),
+        ('file:///home/user/www/index.html', 'file://'),
 
-            ('index.html', 'http://'),
-            ('example.com', 'http://'),
-            ('www.example.com', 'http://'),
-            ('www.example.com/index.html', 'http://'),
-            ('http://example.com', 'http://'),
-            ('http://example.com/index.html', 'http://'),
-            ('localhost', 'http://'),
-            ('localhost/index.html', 'http://'),
+        ('index.html', 'http://'),
+        ('example.com', 'http://'),
+        ('www.example.com', 'http://'),
+        ('www.example.com/index.html', 'http://'),
+        ('http://example.com', 'http://'),
+        ('http://example.com/index.html', 'http://'),
+        ('localhost', 'http://'),
+        ('localhost/index.html', 'http://'),
 
-            # some corner cases (default to http://)
-            ('/', 'http://'),
-            ('.../test', 'http://'),
-
-        ], start=1):
+        # some corner cases (default to http://)
+        ('/', 'http://'),
+        ('.../test', 'http://'),
+    ],
+    start=1,
+):
     t_method = create_guess_scheme_t(args)
     t_method.__name__ = 'test_uri_%03d' % k
     setattr(GuessSchemeTest, t_method.__name__, t_method)
 
 # TODO: the following tests do not pass with current implementation
-for k, args in enumerate([
-            (r'C:\absolute\path\to\a\file.html', 'file://',
-             'Windows filepath are not supported for scrapy shell'),
-        ], start=1):
+for k, args in enumerate(
+    [
+        (
+            r'C:\absolute\path\to\a\file.html',
+            'file://',
+            'Windows filepath are not supported for scrapy shell',
+        ),
+    ],
+    start=1,
+):
     t_method = create_skipped_scheme_t(args)
     t_method.__name__ = 'test_uri_skipped_%03d' % k
     setattr(GuessSchemeTest, t_method.__name__, t_method)
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index d4abebbfb..de61e2125 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -149,7 +149,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             headers={
                 'X-Meta-Single': 'single',
                 'X-Meta-Multivalued': ['value1', 'value2'],
-                }))
+            },
+        ))
 
         self._test(factory,
             b"GET /bar HTTP/1.0\r\n"
@@ -165,7 +166,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             headers=Headers({
                 'X-Meta-Single': 'single',
                 'X-Meta-Multivalued': ['value1', 'value2'],
-                })))
+            }),
+        ))
 
         self._test(factory,
             b"GET /bar HTTP/1.0\r\n"

From d0bb04f08936435202488404d08c0b82f25aa1e5 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 13:37:23 -0300
Subject: [PATCH 093/181] Switch to pickle protocol 4

---
 scrapy/exporters.py              | 2 +-
 scrapy/extensions/httpcache.py   | 4 ++--
 scrapy/extensions/spiderstate.py | 2 +-
 scrapy/squeues.py                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scrapy/exporters.py b/scrapy/exporters.py
index 0cb6cef98..349a9586b 100644
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@@ -250,7 +250,7 @@ class CsvItemExporter(BaseItemExporter):
 
 class PickleItemExporter(BaseItemExporter):
 
-    def __init__(self, file, protocol=2, **kwargs):
+    def __init__(self, file, protocol=4, **kwargs):
         super().__init__(**kwargs)
         self.file = file
         self.protocol = protocol
diff --git a/scrapy/extensions/httpcache.py b/scrapy/extensions/httpcache.py
index 8546628a8..7972b58b1 100644
--- a/scrapy/extensions/httpcache.py
+++ b/scrapy/extensions/httpcache.py
@@ -250,7 +250,7 @@ class DbmCacheStorage:
             'headers': dict(response.headers),
             'body': response.body,
         }
-        self.db['%s_data' % key] = pickle.dumps(data, protocol=2)
+        self.db['%s_data' % key] = pickle.dumps(data, protocol=4)
         self.db['%s_time' % key] = str(time())
 
     def _read_data(self, spider, request):
@@ -317,7 +317,7 @@ class FilesystemCacheStorage:
         with self._open(os.path.join(rpath, 'meta'), 'wb') as f:
             f.write(to_bytes(repr(metadata)))
         with self._open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
-            pickle.dump(metadata, f, protocol=2)
+            pickle.dump(metadata, f, protocol=4)
         with self._open(os.path.join(rpath, 'response_headers'), 'wb') as f:
             f.write(headers_dict_to_raw(response.headers))
         with self._open(os.path.join(rpath, 'response_body'), 'wb') as f:
diff --git a/scrapy/extensions/spiderstate.py b/scrapy/extensions/spiderstate.py
index 2e5ff569f..bea00596e 100644
--- a/scrapy/extensions/spiderstate.py
+++ b/scrapy/extensions/spiderstate.py
@@ -26,7 +26,7 @@ class SpiderState:
     def spider_closed(self, spider):
         if self.jobdir:
             with open(self.statefn, 'wb') as f:
-                pickle.dump(spider.state, f, protocol=2)
+                pickle.dump(spider.state, f, protocol=4)
 
     def spider_opened(self, spider):
         if self.jobdir and os.path.exists(self.statefn):
diff --git a/scrapy/squeues.py b/scrapy/squeues.py
index d0686dac3..8d05bd0d0 100644
--- a/scrapy/squeues.py
+++ b/scrapy/squeues.py
@@ -81,7 +81,7 @@ def _scrapy_non_serialization_queue(queue_class):
 
 def _pickle_serialize(obj):
     try:
-        return pickle.dumps(obj, protocol=2)
+        return pickle.dumps(obj, protocol=4)
     # Python <= 3.4 raises pickle.PicklingError here while
     # 3.5 <= Python < 3.6 raises AttributeError and
     # Python >= 3.6 raises TypeError

From b1ddd7bd7b84d8d8417228aa7392d418463c9728 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 13:44:02 -0300
Subject: [PATCH 094/181] Refactor test_squeues.py

---
 tests/test_squeues.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/tests/test_squeues.py b/tests/test_squeues.py
index 5ad8035f7..7e997a25e 100644
--- a/tests/test_squeues.py
+++ b/tests/test_squeues.py
@@ -47,12 +47,7 @@ def nonserializable_object_test(self):
     self.assertRaises(ValueError, q.push, sel)
 
 
-class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):
-
-    chunksize = 100000
-
-    def queue(self):
-        return MarshalFifoDiskQueue(self.qpath, chunksize=self.chunksize)
+class FifoDiskQueueTestMixin:
 
     def test_serialize(self):
         q = self.queue()
@@ -66,6 +61,13 @@ class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):
     test_nonserializable_object = nonserializable_object_test
 
 
+class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
+    chunksize = 100000
+
+    def queue(self):
+        return MarshalFifoDiskQueue(self.qpath, chunksize=self.chunksize)
+
+
 class ChunkSize1MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
     chunksize = 1
 
@@ -82,7 +84,7 @@ class ChunkSize4MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
     chunksize = 4
 
 
-class PickleFifoDiskQueueTest(MarshalFifoDiskQueueTest):
+class PickleFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
 
     chunksize = 100000
 
@@ -133,10 +135,7 @@ class ChunkSize4PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
     chunksize = 4
 
 
-class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):
-
-    def queue(self):
-        return MarshalLifoDiskQueue(self.qpath)
+class LifoDiskQueueTestMixin:
 
     def test_serialize(self):
         q = self.queue()
@@ -150,7 +149,13 @@ class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):
     test_nonserializable_object = nonserializable_object_test
 
 
-class PickleLifoDiskQueueTest(MarshalLifoDiskQueueTest):
+class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest, LifoDiskQueueTestMixin):
+
+    def queue(self):
+        return MarshalLifoDiskQueue(self.qpath)
+
+
+class PickleLifoDiskQueueTest(t.LifoDiskQueueTest, LifoDiskQueueTestMixin):
 
     def queue(self):
         return PickleLifoDiskQueue(self.qpath)

From 93436f9d3a67cd8abe3b321321c2d36d94f75b8b Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 14:05:27 -0300
Subject: [PATCH 095/181] Chain pickling exception, test_squeues.py updates

---
 scrapy/squeues.py     |  7 +++----
 tests/test_squeues.py | 28 ++++++++++++++--------------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/scrapy/squeues.py b/scrapy/squeues.py
index 8d05bd0d0..c7ad4d53d 100644
--- a/scrapy/squeues.py
+++ b/scrapy/squeues.py
@@ -82,11 +82,10 @@ def _scrapy_non_serialization_queue(queue_class):
 def _pickle_serialize(obj):
     try:
         return pickle.dumps(obj, protocol=4)
-    # Python <= 3.4 raises pickle.PicklingError here while
-    # 3.5 <= Python < 3.6 raises AttributeError and
-    # Python >= 3.6 raises TypeError
+    # Both pickle.PicklingError and AttributeError can be raised by pickle.dump(s)
+    # TypeError is raised from parsel.Selector
     except (pickle.PicklingError, AttributeError, TypeError) as e:
-        raise ValueError(str(e))
+        raise ValueError(str(e)) from e
 
 
 PickleFifoDiskQueueNonRequest = _serializable_queue(
diff --git a/tests/test_squeues.py b/tests/test_squeues.py
index 7e997a25e..a20d242f4 100644
--- a/tests/test_squeues.py
+++ b/tests/test_squeues.py
@@ -28,20 +28,7 @@ class TestLoader(ItemLoader):
 
 def nonserializable_object_test(self):
     q = self.queue()
-    try:
-        pickle.dumps(lambda x: x)
-    except Exception:
-        # Trigger Twisted bug #7989
-        import twisted.persisted.styles  # NOQA
-        self.assertRaises(ValueError, q.push, lambda x: x)
-    else:
-        # Use a different unpickleable object
-        class A:
-            pass
-
-        a = A()
-        a.__reduce__ = a.__reduce_ex__ = None
-        self.assertRaises(ValueError, q.push, a)
+    self.assertRaises(ValueError, q.push, lambda x: x)
     # Selectors should fail (lxml.html.HtmlElement objects can't be pickled)
     sel = Selector(text='<html><body><p>some text</p></body></html>')
     self.assertRaises(ValueError, q.push, sel)
@@ -118,6 +105,19 @@ class PickleFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
         self.assertEqual(r.url, r2.url)
         assert r2.meta['request'] is r2
 
+    def test_non_pickable_object(self):
+        q = self.queue()
+        try:
+            q.push(lambda x: x)
+        except ValueError as exc:
+            self.assertIsInstance(exc.__context__, AttributeError)
+
+        sel = Selector(text='<html><body><p>some text</p></body></html>')
+        try:
+            q.push(sel)
+        except ValueError as exc:
+            self.assertIsInstance(exc.__context__, TypeError)
+
 
 class ChunkSize1PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
     chunksize = 1

From 0e382c816024baffca05b0da29def95f723d27fd Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 14:09:10 -0300
Subject: [PATCH 096/181] Remove unused import

---
 tests/test_squeues.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_squeues.py b/tests/test_squeues.py
index a20d242f4..51c0c028a 100644
--- a/tests/test_squeues.py
+++ b/tests/test_squeues.py
@@ -1,5 +1,3 @@
-import pickle
-
 from queuelib.tests import test_queue as t
 from scrapy.squeues import (
     MarshalFifoDiskQueueNonRequest as MarshalFifoDiskQueue,

From d71804ef29a00fb526ac496930356a47006c639d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 15:23:36 -0300
Subject: [PATCH 097/181] Flake8: Remove E122

---
 pytest.ini              |  4 ++--
 tests/test_webclient.py | 40 ++++++++++++++++++++--------------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 1a73b41be..f8c4ce19a 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -208,7 +208,7 @@ flake8-ignore =
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128 E124
     tests/test_loader.py E501 E741 E128 E117
-    tests/test_logformatter.py E128 E501 E122
+    tests/test_logformatter.py E128 E501
     tests/test_mail.py E128 E501
     tests/test_middleware.py E501 E128
     tests/test_pipeline_crawl.py E501 E128 E126
@@ -244,7 +244,7 @@ flake8-ignore =
     tests/test_utils_signal.py E741 F841
     tests/test_utils_sitemap.py E128 E501 E124
     tests/test_utils_url.py E501 E501 E126
-    tests/test_webclient.py E501 E128 E122 E402 E126
+    tests/test_webclient.py E501 E128 E402 E126
     tests/test_cmdline/__init__.py E501
     tests/test_settings/__init__.py E501 E128
     tests/test_spiderloader/__init__.py E128 E501
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index de61e2125..b657c7ab6 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -53,29 +53,29 @@ class ParseUrlTestCase(unittest.TestCase):
     def testParse(self):
         lip = '127.0.0.1'
         tests = (
-    ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
-    ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
-    ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
-    ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
-    ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
-    ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
+            ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+            ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+            ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
+            ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
+            ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
+            ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
 
-    ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
-    ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
-    ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
-    ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
-    ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
-    ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
-    ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
-    ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
+            ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
+            ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
+            ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
+            ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
+            ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
+            ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
+            ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
+            ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
 
-    ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
-    ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
-    ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
+            ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
+            ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
+            ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
 
-    ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
-    ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
-    )
+            ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
+            ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
+        )
 
         for url, test in tests:
             test = tuple(

From cc23d1cb580795f6fde6e27f92f39ce4e3b8b558 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 15:40:37 -0300
Subject: [PATCH 098/181] Flake8: Remove E124

---
 pytest.ini                   | 14 +++----
 tests/test_dupefilters.py    |  3 +-
 tests/test_exporters.py      | 76 +++++++++++++++++++-----------------
 tests/test_linkextractors.py | 34 +++++++++-------
 tests/test_utils_sitemap.py  | 51 ++++++++++++++++--------
 5 files changed, 103 insertions(+), 75 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index f8c4ce19a..998633d54 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -88,7 +88,7 @@ flake8-ignore =
     scrapy/http/request/form.py E501
     scrapy/http/request/json_request.py E501
     scrapy/http/response/__init__.py E501 E128
-    scrapy/http/response/text.py E501 E128 E124
+    scrapy/http/response/text.py E501 E128
     # scrapy/linkextractors
     scrapy/linkextractors/__init__.py E501 E402
     scrapy/linkextractors/lxmlhtml.py E501
@@ -181,7 +181,7 @@ flake8-ignore =
     tests/test_crawl.py E501 E741
     tests/test_crawler.py F841 E501
     tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E124 E128 E501 E126
+    tests/test_downloader_handlers.py E128 E501 E126
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
     tests/test_downloadermiddleware_cookies.py E741 E501 E128 E126
@@ -195,9 +195,9 @@ flake8-ignore =
     tests/test_downloadermiddleware_retry.py E501 E128 E126
     tests/test_downloadermiddleware_robotstxt.py E501
     tests/test_downloadermiddleware_stats.py E501
-    tests/test_dupefilters.py E501 E741 E128 E124
+    tests/test_dupefilters.py E501 E741 E128
     tests/test_engine.py E501 E128
-    tests/test_exporters.py E501 E128 E124
+    tests/test_exporters.py E501 E128
     tests/test_extension_telnet.py F841
     tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
@@ -206,7 +206,7 @@ flake8-ignore =
     tests/test_http_response.py E501 E128
     tests/test_item.py E128 F841
     tests/test_link.py E501
-    tests/test_linkextractors.py E501 E128 E124
+    tests/test_linkextractors.py E501 E128
     tests/test_loader.py E501 E741 E128 E117
     tests/test_logformatter.py E128 E501
     tests/test_mail.py E128 E501
@@ -226,7 +226,7 @@ flake8-ignore =
     tests/test_spidermiddleware_httperror.py E128 E501 E121
     tests/test_spidermiddleware_offsite.py E501 E128 E111
     tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E124 E501 E121
+    tests/test_spidermiddleware_referer.py E501 F841 E501 E121
     tests/test_squeues.py E501 E741
     tests/test_utils_asyncio.py E501
     tests/test_utils_conf.py E501 E128
@@ -242,7 +242,7 @@ flake8-ignore =
     tests/test_utils_request.py E501 E128
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841
-    tests/test_utils_sitemap.py E128 E501 E124
+    tests/test_utils_sitemap.py E128 E501
     tests/test_utils_url.py E501 E501 E126
     tests/test_webclient.py E501 E128 E402 E126
     tests/test_cmdline/__init__.py E501
diff --git a/tests/test_dupefilters.py b/tests/test_dupefilters.py
index ea0e664be..7426107c1 100644
--- a/tests/test_dupefilters.py
+++ b/tests/test_dupefilters.py
@@ -197,8 +197,7 @@ class RFPDupeFilterTest(unittest.TestCase):
 
             r1 = Request('http://scrapytest.org/index.html')
             r2 = Request('http://scrapytest.org/index.html',
-                headers={'Referer': 'http://scrapytest.org/INDEX.html'}
-            )
+                         headers={'Referer': 'http://scrapytest.org/INDEX.html'})
 
             dupefilter.log(r1, spider)
             dupefilter.log(r2, spider)
diff --git a/tests/test_exporters.py b/tests/test_exporters.py
index 160912847..0f9dafcaa 100644
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -342,20 +342,22 @@ class XmlItemExporterTest(BaseItemExporterTest):
         i2 = dict(name=u'bar', age=i1)
         i3 = TestItem(name=u'buz', age=i2)
 
-        self.assertExportResult(i3,
-            b'<?xml version="1.0" encoding="utf-8"?>\n'
-            b'<items>'
-                b'<item>'
-                    b'<age>'
-                        b'<age>'
-                            b'<age>22</age>'
-                            b'<name>foo\xc2\xa3hoo</name>'
-                        b'</age>'
-                        b'<name>bar</name>'
-                    b'</age>'
-                    b'<name>buz</name>'
-                b'</item>'
-            b'</items>'
+        self.assertExportResult(
+            i3,
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+                <items>
+                    <item>
+                        <age>
+                            <age>
+                                <age>22</age>
+                                <name>foo\xc2\xa3hoo</name>
+                            </age>
+                            <name>bar</name>
+                        </age>
+                        <name>buz</name>
+                    </item>
+                </items>
+            """
         )
 
     def test_nested_list_item(self):
@@ -363,31 +365,35 @@ class XmlItemExporterTest(BaseItemExporterTest):
         i2 = dict(name=u'bar', v2={"egg": ["spam"]})
         i3 = TestItem(name=u'buz', age=[i1, i2])
 
-        self.assertExportResult(i3,
-            b'<?xml version="1.0" encoding="utf-8"?>\n'
-            b'<items>'
-                b'<item>'
-                    b'<age>'
-                        b'<value><name>foo</name></value>'
-                        b'<value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>'
-                    b'</age>'
-                    b'<name>buz</name>'
-                b'</item>'
-            b'</items>'
+        self.assertExportResult(
+            i3,
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+                <items>
+                    <item>
+                        <age>
+                            <value><name>foo</name></value>
+                            <value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>
+                        </age>
+                        <name>buz</name>
+                    </item>
+                </items>
+            """
         )
 
     def test_nonstring_types_item(self):
         item = self._get_nonstring_types_item()
-        self.assertExportResult(item,
-            b'<?xml version="1.0" encoding="utf-8"?>\n'
-            b'<items>'
-               b'<item>'
-                   b'<float>3.14</float>'
-                   b'<boolean>False</boolean>'
-                   b'<number>22</number>'
-                   b'<time>2015-01-01 01:01:01</time>'
-               b'</item>'
-            b'</items>'
+        self.assertExportResult(
+            item,
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+                <items>
+                   <item>
+                       <float>3.14</float>
+                       <boolean>False</boolean>
+                       <number>22</number>
+                       <time>2015-01-01 01:01:01</time>
+                   </item>
+                </items>
+            """
         )
 
 
diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py
index 53968e60e..68e8514ba 100644
--- a/tests/test_linkextractors.py
+++ b/tests/test_linkextractors.py
@@ -413,24 +413,30 @@ class Base:
             response = HtmlResponse("http://example.com/index.xhtml", body=xhtml)
 
             lx = self.extractor_cls()
-            self.assertEqual(lx.extract_links(response),
-                             [Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
-                              Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
-                              Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
-                              Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False),
-                              Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True)]
-                            )
+            self.assertEqual(
+                lx.extract_links(response),
+                [
+                    Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
+                    Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
+                    Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
+                    Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False),
+                    Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True),
+                ]
+            )
 
             response = XmlResponse("http://example.com/index.xhtml", body=xhtml)
 
             lx = self.extractor_cls()
-            self.assertEqual(lx.extract_links(response),
-                             [Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
-                              Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
-                              Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
-                              Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False),
-                              Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True)]
-                            )
+            self.assertEqual(
+                lx.extract_links(response),
+                [
+                    Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
+                    Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
+                    Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
+                    Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False),
+                    Link(url='http://google.com/something', text=u'External link not to follow', nofollow=True),
+                ]
+            )
 
         def test_link_wrong_href(self):
             html = b"""
diff --git a/tests/test_utils_sitemap.py b/tests/test_utils_sitemap.py
index db323ab31..08b215434 100644
--- a/tests/test_utils_sitemap.py
+++ b/tests/test_utils_sitemap.py
@@ -58,10 +58,13 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>
 """)
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
-             {'loc': 'http://www.example.com/2', 'lastmod': ''},
-            ])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'loc': 'http://www.example.com/2', 'lastmod': ''},
+            ]
+        )
 
     def test_sitemap_wrong_ns(self):
         """We have seen sitemaps with wrongs ns. Presumably, Google still works
@@ -80,10 +83,13 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>
 """)
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
-             {'loc': 'http://www.example.com/2', 'lastmod': ''},
-            ])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'loc': 'http://www.example.com/2', 'lastmod': ''},
+            ]
+        )
 
     def test_sitemap_wrong_ns2(self):
         """We have seen sitemaps with wrongs ns. Presumably, Google still works
@@ -103,10 +109,13 @@ class SitemapTest(unittest.TestCase):
 </urlset>
 """)
         assert s.type == 'urlset'
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
-             {'loc': 'http://www.example.com/2', 'lastmod': ''},
-            ])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'loc': 'http://www.example.com/2', 'lastmod': ''},
+            ]
+        )
 
     def test_sitemap_urls_from_robots(self):
         robots = """User-agent: *
@@ -195,11 +204,19 @@ Disallow: /forum/active/
         </url>
     </urlset>""")
 
-        self.assertEqual(list(s), [
-            {'loc': 'http://www.example.com/english/',
-             'alternate': ['http://www.example.com/deutsch/', 'http://www.example.com/schweiz-deutsch/', 'http://www.example.com/english/']
-            }
-        ])
+        self.assertEqual(
+            list(s),
+            [
+                {
+                    'loc': 'http://www.example.com/english/',
+                    'alternate': [
+                        'http://www.example.com/deutsch/',
+                        'http://www.example.com/schweiz-deutsch/',
+                        'http://www.example.com/english/',
+                    ],
+                }
+            ]
+        )
 
     def test_xml_entity_expansion(self):
         s = Sitemap(b"""<?xml version="1.0" encoding="utf-8"?>

From 4c12a234ae65d49678a9840708ff5e7b9d6dcecc Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 16:10:21 -0300
Subject: [PATCH 099/181] Flake8: Remove E126

---
 pytest.ini                                 | 34 +++++++++++-----------
 scrapy/core/downloader/contextfactory.py   | 10 +++----
 scrapy/core/downloader/handlers/s3.py      | 13 +++++----
 scrapy/core/downloader/webclient.py        |  4 +--
 scrapy/downloadermiddlewares/retry.py      | 12 ++++++--
 scrapy/spiderloader.py                     | 25 +++++++++-------
 tests/test_downloadermiddleware_cookies.py | 10 ++++---
 tests/test_downloadermiddleware_retry.py   | 12 ++++++--
 tests/test_http_request.py                 | 32 +++++++++++---------
 tests/test_pipeline_crawl.py               |  4 +--
 tests/test_webclient.py                    | 13 +++++----
 11 files changed, 96 insertions(+), 73 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 998633d54..1570a3a75 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -50,26 +50,26 @@ flake8-ignore =
     scrapy/core/engine.py E501 E128
     scrapy/core/scheduler.py E501
     scrapy/core/scraper.py E501 E128
-    scrapy/core/spidermw.py E501 E126
+    scrapy/core/spidermw.py E501
     scrapy/core/downloader/__init__.py E501
-    scrapy/core/downloader/contextfactory.py E501 E128 E126
+    scrapy/core/downloader/contextfactory.py E501 E128
     scrapy/core/downloader/middleware.py E501
     scrapy/core/downloader/tls.py E501
-    scrapy/core/downloader/webclient.py E501 E128 E126
+    scrapy/core/downloader/webclient.py E501 E128
     scrapy/core/downloader/handlers/__init__.py E501
     scrapy/core/downloader/handlers/ftp.py E501 E128
     scrapy/core/downloader/handlers/http10.py E501
     scrapy/core/downloader/handlers/http11.py E501
-    scrapy/core/downloader/handlers/s3.py E501 E128 E126
+    scrapy/core/downloader/handlers/s3.py E501 E128
     # scrapy/downloadermiddlewares
     scrapy/downloadermiddlewares/ajaxcrawl.py E501
     scrapy/downloadermiddlewares/decompression.py E501
     scrapy/downloadermiddlewares/defaultheaders.py E501
-    scrapy/downloadermiddlewares/httpcache.py E501 E126
+    scrapy/downloadermiddlewares/httpcache.py E501
     scrapy/downloadermiddlewares/httpcompression.py E501 E128
     scrapy/downloadermiddlewares/httpproxy.py E501
     scrapy/downloadermiddlewares/redirect.py E501
-    scrapy/downloadermiddlewares/retry.py E501 E126
+    scrapy/downloadermiddlewares/retry.py E501
     scrapy/downloadermiddlewares/robotstxt.py E501
     scrapy/downloadermiddlewares/stats.py E501
     # scrapy/extensions
@@ -164,12 +164,12 @@ flake8-ignore =
     scrapy/robotstxt.py E501
     scrapy/shell.py E501
     scrapy/signalmanager.py E501
-    scrapy/spiderloader.py F841 E501 E126
+    scrapy/spiderloader.py F841 E501
     scrapy/squeues.py E128
     scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501
-    tests/mockserver.py E501 E126
+    tests/mockserver.py E501
     tests/pipelines.py F841
     tests/spiders.py E501
     tests/test_closespider.py E501
@@ -181,18 +181,18 @@ flake8-ignore =
     tests/test_crawl.py E501 E741
     tests/test_crawler.py F841 E501
     tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E128 E501 E126
+    tests/test_downloader_handlers.py E128 E501
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E741 E501 E128 E126
+    tests/test_downloadermiddleware_cookies.py E741 E501 E128
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
     tests/test_downloadermiddleware_httpcache.py E501
-    tests/test_downloadermiddleware_httpcompression.py E501 E126
+    tests/test_downloadermiddleware_httpcompression.py E501
     tests/test_downloadermiddleware_decompression.py E501
     tests/test_downloadermiddleware_httpproxy.py E501 E128
     tests/test_downloadermiddleware_redirect.py E501 E128
-    tests/test_downloadermiddleware_retry.py E501 E128 E126
+    tests/test_downloadermiddleware_retry.py E501 E128
     tests/test_downloadermiddleware_robotstxt.py E501
     tests/test_downloadermiddleware_stats.py E501
     tests/test_dupefilters.py E501 E741 E128
@@ -202,7 +202,7 @@ flake8-ignore =
     tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
-    tests/test_http_request.py E402 E501 E128 E128 E126
+    tests/test_http_request.py E402 E501 E128 E128
     tests/test_http_response.py E501 E128
     tests/test_item.py E128 F841
     tests/test_link.py E501
@@ -211,7 +211,7 @@ flake8-ignore =
     tests/test_logformatter.py E128 E501
     tests/test_mail.py E128 E501
     tests/test_middleware.py E501 E128
-    tests/test_pipeline_crawl.py E501 E128 E126
+    tests/test_pipeline_crawl.py E501 E128
     tests/test_pipeline_files.py E501
     tests/test_pipeline_images.py F841 E501
     tests/test_pipeline_media.py E501 E741 E128
@@ -219,7 +219,7 @@ flake8-ignore =
     tests/test_request_cb_kwargs.py E501
     tests/test_responsetypes.py E501
     tests/test_robotstxt_interface.py E501 E501
-    tests/test_scheduler.py E501 E126
+    tests/test_scheduler.py E501
     tests/test_selector.py E501
     tests/test_spider.py E501
     tests/test_spidermiddleware.py E501
@@ -243,8 +243,8 @@ flake8-ignore =
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841
     tests/test_utils_sitemap.py E128 E501
-    tests/test_utils_url.py E501 E501 E126
-    tests/test_webclient.py E501 E128 E402 E126
+    tests/test_utils_url.py E501 E501
+    tests/test_webclient.py E501 E128 E402
     tests/test_cmdline/__init__.py E501
     tests/test_settings/__init__.py E501 E128
     tests/test_spiderloader/__init__.py E128 E501
diff --git a/scrapy/core/downloader/contextfactory.py b/scrapy/core/downloader/contextfactory.py
index 6e023ebcc..ab73e12c8 100644
--- a/scrapy/core/downloader/contextfactory.py
+++ b/scrapy/core/downloader/contextfactory.py
@@ -86,8 +86,8 @@ class BrowserLikeContextFactory(ScrapyClientContextFactory):
         #
         # This means that a website like https://www.cacert.org will be rejected
         # by default, since CAcert.org CA certificate is seldom shipped.
-        return optionsForClientTLS(hostname.decode("ascii"),
-                                   trustRoot=platformTrust(),
-                                   extraCertificateOptions={
-                                        'method': self._ssl_method,
-                                   })
+        return optionsForClientTLS(
+            hostname=hostname.decode("ascii"),
+            trustRoot=platformTrust(),
+            extraCertificateOptions={'method': self._ssl_method},
+        )
diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py
index 40a1fa48e..8f63ad974 100644
--- a/scrapy/core/downloader/handlers/s3.py
+++ b/scrapy/core/downloader/handlers/s3.py
@@ -100,11 +100,12 @@ class S3DownloadHandler:
                 url=url, headers=awsrequest.headers.items())
         else:
             signed_headers = self.conn.make_request(
-                    method=request.method,
-                    bucket=bucket,
-                    key=unquote(p.path),
-                    query_args=unquote(p.query),
-                    headers=request.headers,
-                    data=request.body)
+                method=request.method,
+                bucket=bucket,
+                key=unquote(p.path),
+                query_args=unquote(p.query),
+                headers=request.headers,
+                data=request.body,
+            )
             request = request.replace(url=url, headers=signed_headers)
         return self._download_http(request, spider)
diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py
index a90a77b2b..355045d74 100644
--- a/scrapy/core/downloader/webclient.py
+++ b/scrapy/core/downloader/webclient.py
@@ -88,8 +88,8 @@ class ScrapyHTTPPageGetter(HTTPClient):
             self.transport.stopProducing()
 
         self.factory.noPage(
-                defer.TimeoutError("Getting %s took longer than %s seconds." %
-                                   (self.factory.url, self.factory.timeout)))
+            defer.TimeoutError("Getting %s took longer than %s seconds."
+                               % (self.factory.url, self.factory.timeout)))
 
 
 class ScrapyHTTPClientFactory(HTTPClientFactory):
diff --git a/scrapy/downloadermiddlewares/retry.py b/scrapy/downloadermiddlewares/retry.py
index bbf5fca05..6d11af5b2 100644
--- a/scrapy/downloadermiddlewares/retry.py
+++ b/scrapy/downloadermiddlewares/retry.py
@@ -12,9 +12,15 @@ once the spider has finished crawling all regular (non failed) pages.
 import logging
 
 from twisted.internet import defer
-from twisted.internet.error import TimeoutError, DNSLookupError, \
-        ConnectionRefusedError, ConnectionDone, ConnectError, \
-        ConnectionLost, TCPTimedOutError
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
 from twisted.web.client import ResponseFailed
 
 from scrapy.exceptions import NotConfigured
diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py
index 3be5aaec5..8dc89c2e9 100644
--- a/scrapy/spiderloader.py
+++ b/scrapy/spiderloader.py
@@ -24,15 +24,17 @@ class SpiderLoader:
         self._load_all_spiders()
 
     def _check_name_duplicates(self):
-        dupes = ["\n".join("  {cls} named {name!r} (in {module})".format(
-                                module=mod, cls=cls, name=name)
-                           for (mod, cls) in locations)
-                 for name, locations in self._found.items()
-                 if len(locations) > 1]
+        dupes = []
+        for name, locations in self._found.items():
+            dupes.extend([
+                "  {cls} named {name!r} (in {module})".format(module=mod, cls=cls, name=name)
+                for mod, cls in locations
+            ])
+
         if dupes:
+            dupes_string = "\n\n".join(dupes)
             msg = ("There are several spiders with the same name:\n\n"
-                   "{}\n\n  This can cause unexpected behavior.".format(
-                        "\n\n".join(dupes)))
+                   "{}\n\n  This can cause unexpected behavior.".format(dupes_string))
             warnings.warn(msg, UserWarning)
 
     def _load_spiders(self, module):
@@ -45,11 +47,12 @@ class SpiderLoader:
             try:
                 for module in walk_modules(name):
                     self._load_spiders(module)
-            except ImportError as e:
+            except ImportError:
                 if self.warn_only:
-                    msg = ("\n{tb}Could not load spiders from module '{modname}'. "
-                           "See above traceback for details.".format(
-                                modname=name, tb=traceback.format_exc()))
+                    msg = (
+                        "\n{tb}Could not load spiders from module '{modname}'. "
+                        "See above traceback for details.".format(modname=name, tb=traceback.format_exc())
+                    )
                     warnings.warn(msg, RuntimeWarning)
                 else:
                     raise
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index b686a14d6..f86c50f50 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -139,10 +139,12 @@ class CookiesMiddlewareTest(TestCase):
 
     def test_complex_cookies(self):
         # merge some cookies into jar
-        cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
-                {'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'},
-                {'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'},
-                {'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'}]
+        cookies = [
+            {'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
+            {'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'},
+            {'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'},
+            {'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'},
+        ]
 
         req = Request('http://scrapytest.org/', cookies=cookies)
         self.mw.process_request(req, self.spider)
diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py
index 9c989977e..e118750e3 100644
--- a/tests/test_downloadermiddleware_retry.py
+++ b/tests/test_downloadermiddleware_retry.py
@@ -1,8 +1,14 @@
 import unittest
 from twisted.internet import defer
-from twisted.internet.error import TimeoutError, DNSLookupError, \
-        ConnectionRefusedError, ConnectionDone, ConnectError, \
-        ConnectionLost, TCPTimedOutError
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
 from twisted.web.client import ResponseFailed
 
 from scrapy.downloadermiddlewares.retry import RetryMiddleware
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index 3b6d119a9..77da15ce6 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -502,11 +502,13 @@ class FormRequestTest(RequestTest):
 
     def test_from_response_duplicate_form_key(self):
         response = _buildresponse(
-                '<form></form>',
-                url='http://www.example.com')
-        req = self.request_class.from_response(response,
-                method='GET',
-                formdata=(('foo', 'bar'), ('foo', 'baz')))
+            '<form></form>',
+            url='http://www.example.com')
+        req = self.request_class.from_response(
+            response=response,
+            method='GET',
+            formdata=(('foo', 'bar'), ('foo', 'baz')),
+        )
         self.assertEqual(urlparse(req.url).hostname, 'www.example.com')
         self.assertEqual(urlparse(req.url).query, 'foo=bar&foo=baz')
 
@@ -530,9 +532,11 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test" value="val2">
             <input type="hidden" name="test2" value="xxx">
             </form>""")
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'},
-                headers={"Accept-Encoding": "gzip,deflate"})
+        req = self.request_class.from_response(
+            response=response,
+            formdata={'one': ['two', 'three'], 'six': 'seven'},
+            headers={"Accept-Encoding": "gzip,deflate"},
+        )
         self.assertEqual(req.method, 'POST')
         self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
         self.assertEqual(req.headers['Accept-Encoding'], b'gzip,deflate')
@@ -580,9 +584,9 @@ class FormRequestTest(RequestTest):
 
     def test_from_response_override_method(self):
         response = _buildresponse(
-                '''<html><body>
-                <form action="/app"></form>
-                </body></html>''')
+            '''<html><body>
+            <form action="/app"></form>
+            </body></html>''')
         request = FormRequest.from_response(response)
         self.assertEqual(request.method, 'GET')
         request = FormRequest.from_response(response, method='POST')
@@ -590,9 +594,9 @@ class FormRequestTest(RequestTest):
 
     def test_from_response_override_url(self):
         response = _buildresponse(
-                '''<html><body>
-                <form action="/app"></form>
-                </body></html>''')
+            '''<html><body>
+            <form action="/app"></form>
+            </body></html>''')
         request = FormRequest.from_response(response)
         self.assertEqual(request.url, 'http://example.com/app')
         request = FormRequest.from_response(response, url='http://foo.bar/absolute')
diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py
index 962c33144..24c516473 100644
--- a/tests/test_pipeline_crawl.py
+++ b/tests/test_pipeline_crawl.py
@@ -44,9 +44,7 @@ class RedirectedMediaDownloadSpider(MediaDownloadSpider):
     name = 'redirectedmedia'
 
     def _process_url(self, url):
-        return add_or_replace_parameter(
-                    self.mockserver.url('/redirect-to'),
-                    'goto', url)
+        return add_or_replace_parameter(self.mockserver.url('/redirect-to'), 'goto', url)
 
 
 class FileDownloadCrawlTestCase(TestCase):
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index b657c7ab6..307fadb5c 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -18,6 +18,14 @@ except ImportError:
 from twisted.python.filepath import FilePath
 from twisted.protocols.policies import WrappingFactory
 from twisted.internet.defer import inlineCallbacks
+from twisted.web.test.test_webclient import (
+    ForeverTakingResource,
+    ErrorResource,
+    NoLengthResource,
+    HostHeaderResource,
+    PayloadResource,
+    BrokenDownloadResource,
+)
 
 from scrapy.core.downloader import webclient as client
 from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
@@ -202,11 +210,6 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             Headers({'Hello': ['World'], 'Foo': ['Bar']}))
 
 
-from twisted.web.test.test_webclient import ForeverTakingResource, \
-        ErrorResource, NoLengthResource, HostHeaderResource, \
-        PayloadResource, BrokenDownloadResource
-
-
 class EncodingResource(resource.Resource):
     out_encoding = 'cp1251'
 

From 88efc988473fb0db8ca8fb512b2aab834c5aa7af Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 16:42:47 -0300
Subject: [PATCH 100/181] Flake8: remove E129

---
 pytest.ini                    | 4 ++--
 tests/test_utils_iterators.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 1a73b41be..ff0bb010f 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -110,7 +110,7 @@ flake8-ignore =
     # scrapy/spidermiddlewares
     scrapy/spidermiddlewares/httperror.py E501
     scrapy/spidermiddlewares/offsite.py E501
-    scrapy/spidermiddlewares/referer.py E501 E129
+    scrapy/spidermiddlewares/referer.py E501
     scrapy/spidermiddlewares/urllength.py E501
     # scrapy/spiders
     scrapy/spiders/__init__.py E501 E402
@@ -235,7 +235,7 @@ flake8-ignore =
     tests/test_utils_defer.py E501 F841
     tests/test_utils_deprecate.py F841 E501
     tests/test_utils_http.py E501 E128
-    tests/test_utils_iterators.py E501 E128 E129
+    tests/test_utils_iterators.py E501 E128
     tests/test_utils_log.py E741
     tests/test_utils_python.py E501
     tests/test_utils_reqser.py E501 E128
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index a85087619..46aaaecbc 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -93,8 +93,8 @@ class XmliterTestCase(unittest.TestCase):
             # with bytes
             XmlResponse(url="http://example.com", body=body.encode('utf-8')),
             # Unicode body needs encoding information
-            XmlResponse(url="http://example.com", body=body, encoding='utf-8')):
-
+            XmlResponse(url="http://example.com", body=body, encoding='utf-8'),
+        ):
             attrs = []
             for x in self.xmliter(r, u'þingflokkur'):
                 attrs.append((x.attrib['id'],

From 2af34873cc385e47cc962903ff3d6a13c9847615 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 17:10:10 -0300
Subject: [PATCH 101/181] Flake8: Remove E128 (commands module)

---
 pytest.ini                      | 16 ++++++++--------
 scrapy/commands/__init__.py     | 12 ++++++------
 scrapy/commands/fetch.py        |  4 ++--
 scrapy/commands/genspider.py    | 10 +++++-----
 scrapy/commands/parse.py        | 24 ++++++++++++------------
 scrapy/commands/settings.py     | 10 +++++-----
 scrapy/commands/shell.py        |  8 ++++----
 scrapy/commands/startproject.py |  6 ++----
 scrapy/commands/version.py      |  2 +-
 9 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 1570a3a75..d96ac9684 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -31,18 +31,18 @@ flake8-ignore =
     extras/qps-bench-server.py E501
     extras/qpsclient.py E501 E501
     # scrapy/commands
-    scrapy/commands/__init__.py E128 E501
+    scrapy/commands/__init__.py E501
     scrapy/commands/check.py E501
     scrapy/commands/crawl.py E501
     scrapy/commands/edit.py E501
-    scrapy/commands/fetch.py E501 E128
-    scrapy/commands/genspider.py E128 E501
-    scrapy/commands/parse.py E128 E501
+    scrapy/commands/fetch.py E501
+    scrapy/commands/genspider.py E501
+    scrapy/commands/parse.py E501
     scrapy/commands/runspider.py E501
-    scrapy/commands/settings.py E128
-    scrapy/commands/shell.py E128 E501
-    scrapy/commands/startproject.py E501 E128
-    scrapy/commands/version.py E501 E128
+    scrapy/commands/settings.py E501
+    scrapy/commands/shell.py E501
+    scrapy/commands/startproject.py E501
+    scrapy/commands/version.py E501
     # scrapy/contracts
     scrapy/contracts/__init__.py E501
     scrapy/contracts/default.py E128
diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 9f8e6986a..97f818cb2 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -59,17 +59,17 @@ class ScrapyCommand:
         """
         group = OptionGroup(parser, "Global Options")
         group.add_option("--logfile", metavar="FILE",
-            help="log file. if omitted stderr will be used")
+                         help="log file. if omitted stderr will be used")
         group.add_option("-L", "--loglevel", metavar="LEVEL", default=None,
-            help="log level (default: %s)" % self.settings['LOG_LEVEL'])
+                         help="log level (default: %s)" % self.settings['LOG_LEVEL'])
         group.add_option("--nolog", action="store_true",
-            help="disable logging completely")
+                         help="disable logging completely")
         group.add_option("--profile", metavar="FILE", default=None,
-            help="write python cProfile stats to FILE")
+                         help="write python cProfile stats to FILE")
         group.add_option("--pidfile", metavar="FILE",
-            help="write process ID to FILE")
+                         help="write process ID to FILE")
         group.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE",
-            help="set/override setting (may be repeated)")
+                         help="set/override setting (may be repeated)")
         group.add_option("--pdb", action="store_true", help="enable pdb on failure")
 
         parser.add_option_group(group)
diff --git a/scrapy/commands/fetch.py b/scrapy/commands/fetch.py
index 506d1f1b7..063195f50 100644
--- a/scrapy/commands/fetch.py
+++ b/scrapy/commands/fetch.py
@@ -27,8 +27,8 @@ class Command(ScrapyCommand):
         parser.add_option("--spider", dest="spider", help="use this spider")
         parser.add_option("--headers", dest="headers", action="store_true",
                           help="print response HTTP headers instead of body")
-        parser.add_option("--no-redirect", dest="no_redirect", action="store_true",
-                          default=False, help="do not handle HTTP 3xx status codes and print response as-is")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")
 
     def _print_headers(self, headers, prefix):
         for key, values in headers.items():
diff --git a/scrapy/commands/genspider.py b/scrapy/commands/genspider.py
index 2e837abed..abf3b7a5c 100644
--- a/scrapy/commands/genspider.py
+++ b/scrapy/commands/genspider.py
@@ -36,15 +36,15 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("-l", "--list", dest="list", action="store_true",
-            help="List available templates")
+                          help="List available templates")
         parser.add_option("-e", "--edit", dest="edit", action="store_true",
-            help="Edit spider after creating it")
+                          help="Edit spider after creating it")
         parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
-            help="Dump template to standard output")
+                          help="Dump template to standard output")
         parser.add_option("-t", "--template", dest="template", default="basic",
-            help="Uses a custom template.")
+                          help="Uses a custom template.")
         parser.add_option("--force", dest="force", action="store_true",
-            help="If the spider already exists, overwrite it with the template")
+                          help="If the spider already exists, overwrite it with the template")
 
     def run(self, args, opts):
         if opts.list:
diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index 1cefed106..d9ab2126a 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -33,29 +33,29 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("--spider", dest="spider", default=None,
-            help="use this spider without looking for one")
+                          help="use this spider without looking for one")
         parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-            help="set spider argument (may be repeated)")
+                          help="set spider argument (may be repeated)")
         parser.add_option("--pipelines", action="store_true",
-            help="process items through pipelines")
+                          help="process items through pipelines")
         parser.add_option("--nolinks", dest="nolinks", action="store_true",
-            help="don't show links to follow (extracted requests)")
+                          help="don't show links to follow (extracted requests)")
         parser.add_option("--noitems", dest="noitems", action="store_true",
-            help="don't show scraped items")
+                          help="don't show scraped items")
         parser.add_option("--nocolour", dest="nocolour", action="store_true",
-            help="avoid using pygments to colorize the output")
+                          help="avoid using pygments to colorize the output")
         parser.add_option("-r", "--rules", dest="rules", action="store_true",
-            help="use CrawlSpider rules to discover the callback")
+                          help="use CrawlSpider rules to discover the callback")
         parser.add_option("-c", "--callback", dest="callback",
-            help="use this callback for parsing, instead looking for a callback")
+                          help="use this callback for parsing, instead looking for a callback")
         parser.add_option("-m", "--meta", dest="meta",
-            help="inject extra meta into the Request, it must be a valid raw json string")
+                          help="inject extra meta into the Request, it must be a valid raw json string")
         parser.add_option("--cbkwargs", dest="cbkwargs",
-            help="inject extra callback kwargs into the Request, it must be a valid raw json string")
+                          help="inject extra callback kwargs into the Request, it must be a valid raw json string")
         parser.add_option("-d", "--depth", dest="depth", type="int", default=1,
-            help="maximum depth for parsing requests [default: %default]")
+                          help="maximum depth for parsing requests [default: %default]")
         parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
-            help="print each depth level one by one")
+                          help="print each depth level one by one")
 
     @property
     def max_level(self):
diff --git a/scrapy/commands/settings.py b/scrapy/commands/settings.py
index 603bafb9f..8d49e440f 100644
--- a/scrapy/commands/settings.py
+++ b/scrapy/commands/settings.py
@@ -19,15 +19,15 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("--get", dest="get", metavar="SETTING",
-            help="print raw setting value")
+                          help="print raw setting value")
         parser.add_option("--getbool", dest="getbool", metavar="SETTING",
-            help="print setting value, interpreted as a boolean")
+                          help="print setting value, interpreted as a boolean")
         parser.add_option("--getint", dest="getint", metavar="SETTING",
-            help="print setting value, interpreted as an integer")
+                          help="print setting value, interpreted as an integer")
         parser.add_option("--getfloat", dest="getfloat", metavar="SETTING",
-            help="print setting value, interpreted as a float")
+                          help="print setting value, interpreted as a float")
         parser.add_option("--getlist", dest="getlist", metavar="SETTING",
-            help="print setting value, interpreted as a list")
+                          help="print setting value, interpreted as a list")
 
     def run(self, args, opts):
         settings = self.crawler_process.settings
diff --git a/scrapy/commands/shell.py b/scrapy/commands/shell.py
index 5946f21e8..d1944df3d 100644
--- a/scrapy/commands/shell.py
+++ b/scrapy/commands/shell.py
@@ -34,11 +34,11 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("-c", dest="code",
-            help="evaluate the code in the shell, print the result and exit")
+                          help="evaluate the code in the shell, print the result and exit")
         parser.add_option("--spider", dest="spider",
-            help="use this spider")
-        parser.add_option("--no-redirect", dest="no_redirect", action="store_true",
-            default=False, help="do not handle HTTP 3xx status codes and print response as-is")
+                          help="use this spider")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")
 
     def update_vars(self, vars):
         """You can use this function to update the Scrapy objects that will be
diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index b123e5c84..ebe3a9c2c 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -102,10 +102,8 @@ class Command(ScrapyCommand):
         move(join(project_dir, 'module'), join(project_dir, project_name))
         for paths in TEMPLATES_TO_RENDER:
             path = join(*paths)
-            tplfile = join(project_dir,
-                string.Template(path).substitute(project_name=project_name))
-            render_templatefile(tplfile, project_name=project_name,
-                ProjectName=string_camelcase(project_name))
+            tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name))
+            render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name))
         print("New Scrapy project '%s', using template directory '%s', "
               "created in:" % (project_name, self.templates_dir))
         print("    %s\n" % abspath(project_dir))
diff --git a/scrapy/commands/version.py b/scrapy/commands/version.py
index 1516c5997..d0ea72a67 100644
--- a/scrapy/commands/version.py
+++ b/scrapy/commands/version.py
@@ -17,7 +17,7 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("--verbose", "-v", dest="verbose", action="store_true",
-            help="also display twisted/python/platform info (useful for bug reports)")
+                          help="also display twisted/python/platform info (useful for bug reports)")
 
     def run(self, args, opts):
         if opts.verbose:

From 7383b2b428b08e84d6e3e40e6229c892e5802f7a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 17:27:44 -0300
Subject: [PATCH 102/181] Flake8: Remove E128 (core module)

---
 pytest.ini                               | 12 ++++++------
 scrapy/core/downloader/contextfactory.py | 11 ++++++-----
 scrapy/core/downloader/handlers/ftp.py   |  7 +++----
 scrapy/core/engine.py                    | 13 +++++--------
 4 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index d96ac9684..7c6ee07d6 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -47,20 +47,20 @@ flake8-ignore =
     scrapy/contracts/__init__.py E501
     scrapy/contracts/default.py E128
     # scrapy/core
-    scrapy/core/engine.py E501 E128
+    scrapy/core/engine.py E501
     scrapy/core/scheduler.py E501
-    scrapy/core/scraper.py E501 E128
+    scrapy/core/scraper.py E501
     scrapy/core/spidermw.py E501
     scrapy/core/downloader/__init__.py E501
-    scrapy/core/downloader/contextfactory.py E501 E128
+    scrapy/core/downloader/contextfactory.py E501
     scrapy/core/downloader/middleware.py E501
     scrapy/core/downloader/tls.py E501
-    scrapy/core/downloader/webclient.py E501 E128
+    scrapy/core/downloader/webclient.py E501
     scrapy/core/downloader/handlers/__init__.py E501
-    scrapy/core/downloader/handlers/ftp.py E501 E128
+    scrapy/core/downloader/handlers/ftp.py E501
     scrapy/core/downloader/handlers/http10.py E501
     scrapy/core/downloader/handlers/http11.py E501
-    scrapy/core/downloader/handlers/s3.py E501 E128
+    scrapy/core/downloader/handlers/s3.py E501
     # scrapy/downloadermiddlewares
     scrapy/downloadermiddlewares/ajaxcrawl.py E501
     scrapy/downloadermiddlewares/decompression.py E501
diff --git a/scrapy/core/downloader/contextfactory.py b/scrapy/core/downloader/contextfactory.py
index ab73e12c8..452242d47 100644
--- a/scrapy/core/downloader/contextfactory.py
+++ b/scrapy/core/downloader/contextfactory.py
@@ -46,11 +46,12 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
         #
         # * getattr() for `_ssl_method` attribute for context factories
         #   not calling super(..., self).__init__
-        return CertificateOptions(verify=False,
-                    method=getattr(self, 'method',
-                                   getattr(self, '_ssl_method', None)),
-                    fixBrokenPeers=True,
-                    acceptableCiphers=self.tls_ciphers)
+        return CertificateOptions(
+            verify=False,
+            method=getattr(self, 'method', getattr(self, '_ssl_method', None)),
+            fixBrokenPeers=True,
+            acceptableCiphers=self.tls_ciphers,
+        )
 
     # kept for old-style HTTP/1.0 downloader context twisted calls,
     # e.g. connectSSL()
diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py
index 94b55c347..3ef129587 100644
--- a/scrapy/core/downloader/handlers/ftp.py
+++ b/scrapy/core/downloader/handlers/ftp.py
@@ -86,10 +86,9 @@ class FTPDownloadHandler:
         password = request.meta.get("ftp_password", self.default_password)
         passive_mode = 1 if bool(request.meta.get("ftp_passive",
                                                   self.passive_mode)) else 0
-        creator = ClientCreator(reactor, FTPClient, user, password,
-            passive=passive_mode)
-        return creator.connectTCP(parsed_url.hostname, parsed_url.port or 21).addCallback(self.gotClient,
-                                request, unquote(parsed_url.path))
+        creator = ClientCreator(reactor, FTPClient, user, password, passive=passive_mode)
+        dfd = creator.connectTCP(parsed_url.hostname, parsed_url.port or 21)
+        return dfd.addCallback(self.gotClient, request, unquote(parsed_url.path))
 
     def gotClient(self, client, request, filepath):
         self.client = client
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index 324d21716..de0da4b70 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -217,11 +217,9 @@ class ExecutionEngine:
         self.slot.nextcall.schedule()
 
     def schedule(self, request, spider):
-        self.signals.send_catch_log(signal=signals.request_scheduled,
-                request=request, spider=spider)
+        self.signals.send_catch_log(signals.request_scheduled, request=request, spider=spider)
         if not self.slot.scheduler.enqueue_request(request):
-            self.signals.send_catch_log(signal=signals.request_dropped,
-                                        request=request, spider=spider)
+            self.signals.send_catch_log(signals.request_dropped, request=request, spider=spider)
 
     def download(self, request, spider):
         d = self._download(request, spider)
@@ -247,8 +245,8 @@ class ExecutionEngine:
                 logkws = self.logformatter.crawled(request, response, spider)
                 if logkws is not None:
                     logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
-                self.signals.send_catch_log(signal=signals.response_received,
-                    response=response, request=request, spider=spider)
+                self.signals.send_catch_log(signals.response_received,
+                                            response=response, request=request, spider=spider)
             return response
 
         def _on_complete(_):
@@ -286,8 +284,7 @@ class ExecutionEngine:
         next loop and this function is guaranteed to be called (at least) once
         again for this spider.
         """
-        res = self.signals.send_catch_log(signal=signals.spider_idle,
-            spider=spider, dont_log=DontCloseSpider)
+        res = self.signals.send_catch_log(signals.spider_idle, spider=spider, dont_log=DontCloseSpider)
         if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) for _, x in res):
             return
 

From a72f5aadcf0b7ee8ef77f43602aecf34ffea7fd6 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 17:38:39 -0300
Subject: [PATCH 103/181] Flake8: Remove E128 (utils module)

---
 pytest.ini               | 10 +++++-----
 scrapy/utils/defer.py    |  7 +++++--
 scrapy/utils/log.py      | 10 ++++++----
 scrapy/utils/response.py |  7 +++----
 scrapy/utils/signal.py   |  6 ++----
 scrapy/utils/url.py      |  3 +--
 6 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 7c6ee07d6..63a3051cf 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -123,13 +123,13 @@ flake8-ignore =
     scrapy/utils/conf.py E402 E501
     scrapy/utils/datatypes.py E501
     scrapy/utils/decorators.py E501
-    scrapy/utils/defer.py E501 E128
+    scrapy/utils/defer.py E501
     scrapy/utils/deprecate.py E501
     scrapy/utils/gz.py E501
     scrapy/utils/http.py F403
     scrapy/utils/httpobj.py E501
     scrapy/utils/iterators.py E501
-    scrapy/utils/log.py E128 E501
+    scrapy/utils/log.py E501
     scrapy/utils/markup.py F403
     scrapy/utils/misc.py E501
     scrapy/utils/multipart.py F403
@@ -138,13 +138,13 @@ flake8-ignore =
     scrapy/utils/reactor.py E501
     scrapy/utils/reqser.py E501
     scrapy/utils/request.py E501
-    scrapy/utils/response.py E501 E128
-    scrapy/utils/signal.py E501 E128
+    scrapy/utils/response.py E501
+    scrapy/utils/signal.py E501
     scrapy/utils/sitemap.py E501
     scrapy/utils/spider.py E501
     scrapy/utils/ssl.py E501
     scrapy/utils/test.py E501
-    scrapy/utils/url.py E501 F403 E128 F405
+    scrapy/utils/url.py E501 F403 F405
     # scrapy
     scrapy/__init__.py E402 E501
     scrapy/cmdline.py E501
diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py
index 34b8d9774..a3950db75 100644
--- a/scrapy/utils/defer.py
+++ b/scrapy/utils/defer.py
@@ -88,8 +88,11 @@ def process_chain_both(callbacks, errbacks, input, *a, **kw):
     """Return a Deferred built by chaining the given callbacks and errbacks"""
     d = defer.Deferred()
     for cb, eb in zip(callbacks, errbacks):
-        d.addCallbacks(cb, eb, callbackArgs=a, callbackKeywords=kw,
-            errbackArgs=a, errbackKeywords=kw)
+        d.addCallbacks(
+            callback=cb, errback=eb,
+            callbackArgs=a, callbackKeywords=kw,
+            errbackArgs=a, errbackKeywords=kw,
+        )
     if isinstance(input, failure.Failure):
         d.errback(input)
     else:
diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py
index 5998dc33b..83c359bd4 100644
--- a/scrapy/utils/log.py
+++ b/scrapy/utils/log.py
@@ -144,10 +144,12 @@ def _get_handler(settings):
 def log_scrapy_info(settings):
     logger.info("Scrapy %(version)s started (bot: %(bot)s)",
                 {'version': scrapy.__version__, 'bot': settings['BOT_NAME']})
-    logger.info("Versions: %(versions)s",
-                {'versions': ", ".join("%s %s" % (name, version)
-                    for name, version in scrapy_components_versions()
-                    if name != "Scrapy")})
+    versions = [
+        "%s %s" % (name, version)
+        for name, version in scrapy_components_versions()
+        if name != "Scrapy"
+    ]
+    logger.info("Versions: %(versions)s", {'versions': ", ".join(versions)})
     from twisted.internet import reactor
     logger.debug("Using reactor: %s.%s", reactor.__module__, reactor.__class__.__name__)
 
diff --git a/scrapy/utils/response.py b/scrapy/utils/response.py
index 29fdaaf2c..edbc0db25 100644
--- a/scrapy/utils/response.py
+++ b/scrapy/utils/response.py
@@ -19,8 +19,7 @@ def get_base_url(response):
     """Return the base url of the given response, joined with the response url"""
     if response not in _baseurl_cache:
         text = response.text[0:4096]
-        _baseurl_cache[response] = html.get_base_url(text, response.url,
-            response.encoding)
+        _baseurl_cache[response] = html.get_base_url(text, response.url, response.encoding)
     return _baseurl_cache[response]
 
 
@@ -31,8 +30,8 @@ def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
     """Parse the http-equiv refrsh parameter from the given response"""
     if response not in _metaref_cache:
         text = response.text[0:4096]
-        _metaref_cache[response] = html.get_meta_refresh(text, response.url,
-            response.encoding, ignore_tags=ignore_tags)
+        _metaref_cache[response] = html.get_meta_refresh(
+            text, response.url, response.encoding, ignore_tags=ignore_tags)
     return _metaref_cache[response]
 
 
diff --git a/scrapy/utils/signal.py b/scrapy/utils/signal.py
index 60c561da6..a311e9257 100644
--- a/scrapy/utils/signal.py
+++ b/scrapy/utils/signal.py
@@ -28,8 +28,7 @@ def send_catch_log(signal=Any, sender=Anonymous, *arguments, **named):
     responses = []
     for receiver in liveReceivers(getAllReceivers(sender, signal)):
         try:
-            response = robustApply(receiver, signal=signal, sender=sender,
-                *arguments, **named)
+            response = robustApply(receiver, signal=signal, sender=sender, *arguments, **named)
             if isinstance(response, Deferred):
                 logger.error("Cannot return deferreds from signal handler: %(receiver)s",
                              {'receiver': receiver}, extra={'spider': spider})
@@ -63,8 +62,7 @@ def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
     spider = named.get('spider', None)
     dfds = []
     for receiver in liveReceivers(getAllReceivers(sender, signal)):
-        d = maybeDeferred_coro(robustApply, receiver, signal=signal, sender=sender,
-                *arguments, **named)
+        d = maybeDeferred_coro(robustApply, receiver, signal=signal, sender=sender, *arguments, **named)
         d.addErrback(logerror, receiver)
         d.addBoth(lambda result: (receiver, result))
         dfds.append(d)
diff --git a/scrapy/utils/url.py b/scrapy/utils/url.py
index c9abb12d5..955b63d4b 100644
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@@ -27,8 +27,7 @@ def url_is_from_any_domain(url, domains):
 
 def url_is_from_spider(url, spider):
     """Return True if the url belongs to the given spider"""
-    return url_is_from_any_domain(url,
-        [spider.name] + list(getattr(spider, 'allowed_domains', [])))
+    return url_is_from_any_domain(url, [spider.name] + list(getattr(spider, 'allowed_domains', [])))
 
 
 def url_has_any_extension(url, extensions):

From 4171b3f663aea5706ba6acf15f64e92be0e34f2a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 18:28:12 -0300
Subject: [PATCH 104/181] Flake8: Remove E128 (extensions module)

---
 pytest.ini                     | 6 +++---
 scrapy/extensions/httpcache.py | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 63a3051cf..1af3ea0d1 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -73,10 +73,10 @@ flake8-ignore =
     scrapy/downloadermiddlewares/robotstxt.py E501
     scrapy/downloadermiddlewares/stats.py E501
     # scrapy/extensions
-    scrapy/extensions/closespider.py E501 E128
+    scrapy/extensions/closespider.py E501
     scrapy/extensions/corestats.py E501
-    scrapy/extensions/feedexport.py E128 E501
-    scrapy/extensions/httpcache.py E128 E501
+    scrapy/extensions/feedexport.py E501
+    scrapy/extensions/httpcache.py E501
     scrapy/extensions/memdebug.py E501
     scrapy/extensions/spiderstate.py E501
     scrapy/extensions/telnet.py E501
diff --git a/scrapy/extensions/httpcache.py b/scrapy/extensions/httpcache.py
index 8546628a8..6289efec0 100644
--- a/scrapy/extensions/httpcache.py
+++ b/scrapy/extensions/httpcache.py
@@ -46,9 +46,10 @@ class RFC2616Policy:
     def __init__(self, settings):
         self.always_store = settings.getbool('HTTPCACHE_ALWAYS_STORE')
         self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
-        self.ignore_response_cache_controls = [to_bytes(cc) for cc in
-            settings.getlist('HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS')]
         self._cc_parsed = WeakKeyDictionary()
+        self.ignore_response_cache_controls = [
+            to_bytes(cc) for cc in settings.getlist('HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS')
+        ]
 
     def _parse_cachecontrol(self, r):
         if r not in self._cc_parsed:

From 2fbbca56fb1e3bba98bf6c4c84591121c4c1da57 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 18:34:40 -0300
Subject: [PATCH 105/181] pytest.ini: remove unnecessary E128 lines

---
 pytest.ini | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 1af3ea0d1..c0d74f7d9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -45,7 +45,6 @@ flake8-ignore =
     scrapy/commands/version.py E501
     # scrapy/contracts
     scrapy/contracts/__init__.py E501
-    scrapy/contracts/default.py E128
     # scrapy/core
     scrapy/core/engine.py E501
     scrapy/core/scheduler.py E501
@@ -66,7 +65,7 @@ flake8-ignore =
     scrapy/downloadermiddlewares/decompression.py E501
     scrapy/downloadermiddlewares/defaultheaders.py E501
     scrapy/downloadermiddlewares/httpcache.py E501
-    scrapy/downloadermiddlewares/httpcompression.py E501 E128
+    scrapy/downloadermiddlewares/httpcompression.py E501
     scrapy/downloadermiddlewares/httpproxy.py E501
     scrapy/downloadermiddlewares/redirect.py E501
     scrapy/downloadermiddlewares/retry.py E501
@@ -87,13 +86,13 @@ flake8-ignore =
     scrapy/http/request/__init__.py E501
     scrapy/http/request/form.py E501
     scrapy/http/request/json_request.py E501
-    scrapy/http/response/__init__.py E501 E128
-    scrapy/http/response/text.py E501 E128
+    scrapy/http/response/__init__.py E501
+    scrapy/http/response/text.py E501
     # scrapy/linkextractors
     scrapy/linkextractors/__init__.py E501 E402
     scrapy/linkextractors/lxmlhtml.py E501
     # scrapy/loader
-    scrapy/loader/__init__.py E501 E128
+    scrapy/loader/__init__.py E501
     scrapy/loader/processors.py E501
     # scrapy/pipelines
     scrapy/pipelines/__init__.py E501
@@ -157,7 +156,7 @@ flake8-ignore =
     scrapy/link.py E501
     scrapy/logformatter.py E501
     scrapy/mail.py E402 E128 E501
-    scrapy/middleware.py E128 E501
+    scrapy/middleware.py E501
     scrapy/pqueues.py E501
     scrapy/resolver.py E501
     scrapy/responsetypes.py E128 E501
@@ -165,7 +164,6 @@ flake8-ignore =
     scrapy/shell.py E501
     scrapy/signalmanager.py E501
     scrapy/spiderloader.py F841 E501
-    scrapy/squeues.py E128
     scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501

From 2851f641f2c105350948a34be55aa00dfed9933a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 18:35:50 -0300
Subject: [PATCH 106/181] Flake8: Remove E128 (item, mail, responsetypes)

---
 pytest.ini              |  6 +++---
 scrapy/item.py          |  6 ++----
 scrapy/mail.py          | 32 ++++++++++++++++++++++----------
 scrapy/responsetypes.py |  6 +++---
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index c0d74f7d9..0a5a172b3 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -152,14 +152,14 @@ flake8-ignore =
     scrapy/exceptions.py E501
     scrapy/exporters.py E501
     scrapy/interfaces.py E501
-    scrapy/item.py E501 E128
+    scrapy/item.py E501
     scrapy/link.py E501
     scrapy/logformatter.py E501
-    scrapy/mail.py E402 E128 E501
+    scrapy/mail.py E402 E501
     scrapy/middleware.py E501
     scrapy/pqueues.py E501
     scrapy/resolver.py E501
-    scrapy/responsetypes.py E128 E501
+    scrapy/responsetypes.py E501
     scrapy/robotstxt.py E501
     scrapy/shell.py E501
     scrapy/signalmanager.py E501
diff --git a/scrapy/item.py b/scrapy/item.py
index 748368932..b75d04404 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -86,8 +86,7 @@ class DictItem(MutableMapping, BaseItem):
         if key in self.fields:
             self._values[key] = value
         else:
-            raise KeyError("%s does not support field: %s" %
-                (self.__class__.__name__, key))
+            raise KeyError("%s does not support field: %s" % (self.__class__.__name__, key))
 
     def __delitem__(self, key):
         del self._values[key]
@@ -99,8 +98,7 @@ class DictItem(MutableMapping, BaseItem):
 
     def __setattr__(self, name, value):
         if not name.startswith('_'):
-            raise AttributeError("Use item[%r] = %r to set field value" %
-                (name, value))
+            raise AttributeError("Use item[%r] = %r to set field value" % (name, value))
         super(DictItem, self).__setattr__(name, value)
 
     def __len__(self):
diff --git a/scrapy/mail.py b/scrapy/mail.py
index 9d7896ef6..7d7a2c435 100644
--- a/scrapy/mail.py
+++ b/scrapy/mail.py
@@ -28,8 +28,10 @@ def _to_bytes_or_none(text):
 
 
 class MailSender:
-    def __init__(self, smtphost='localhost', mailfrom='scrapy@localhost',
-            smtpuser=None, smtppass=None, smtpport=25, smtptls=False, smtpssl=False, debug=False):
+    def __init__(
+        self, smtphost='localhost', mailfrom='scrapy@localhost', smtpuser=None,
+        smtppass=None, smtpport=25, smtptls=False, smtpssl=False, debug=False
+    ):
         self.smtphost = smtphost
         self.smtpport = smtpport
         self.smtpuser = _to_bytes_or_none(smtpuser)
@@ -41,9 +43,15 @@ class MailSender:
 
     @classmethod
     def from_settings(cls, settings):
-        return cls(settings['MAIL_HOST'], settings['MAIL_FROM'], settings['MAIL_USER'],
-            settings['MAIL_PASS'], settings.getint('MAIL_PORT'),
-            settings.getbool('MAIL_TLS'), settings.getbool('MAIL_SSL'))
+        return cls(
+            smtphost=settings['MAIL_HOST'],
+            mailfrom=settings['MAIL_FROM'],
+            smtpuser=settings['MAIL_USER'],
+            smtppass=settings['MAIL_PASS'],
+            smtpport=settings.getint('MAIL_PORT'),
+            smtptls=settings.getbool('MAIL_TLS'),
+            smtpssl=settings.getbool('MAIL_SSL'),
+        )
 
     def send(self, to, subject, body, cc=None, attachs=(), mimetype='text/plain', charset=None, _callback=None):
         from twisted.internet import reactor
@@ -89,9 +97,12 @@ class MailSender:
             return
 
         dfd = self._sendmail(rcpts, msg.as_string().encode(charset or 'utf-8'))
-        dfd.addCallbacks(self._sent_ok, self._sent_failed,
+        dfd.addCallbacks(
+            callback=self._sent_ok,
+            errback=self._sent_failed,
             callbackArgs=[to, cc, subject, len(attachs)],
-            errbackArgs=[to, cc, subject, len(attachs)])
+            errbackArgs=[to, cc, subject, len(attachs)],
+        )
         reactor.addSystemEventTrigger('before', 'shutdown', lambda: dfd)
         return dfd
 
@@ -115,9 +126,10 @@ class MailSender:
         from twisted.mail.smtp import ESMTPSenderFactory
         msg = BytesIO(msg)
         d = defer.Deferred()
-        factory = ESMTPSenderFactory(self.smtpuser, self.smtppass, self.mailfrom,
-            to_addrs, msg, d, heloFallback=True, requireAuthentication=False,
-            requireTransportSecurity=self.smtptls)
+        factory = ESMTPSenderFactory(
+            self.smtpuser, self.smtppass, self.mailfrom, to_addrs, msg, d,
+            heloFallback=True, requireAuthentication=False, requireTransportSecurity=self.smtptls,
+        )
         factory.noisy = False
 
         if self.smtpssl:
diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py
index 7c5eeac21..d207088e6 100644
--- a/scrapy/responsetypes.py
+++ b/scrapy/responsetypes.py
@@ -58,9 +58,9 @@ class ResponseTypes:
 
     def from_content_disposition(self, content_disposition):
         try:
-            filename = to_unicode(content_disposition,
-                encoding='latin-1', errors='replace').split(';')[1].split('=')[1]
-            filename = filename.strip('"\'')
+            filename = to_unicode(
+                content_disposition, encoding='latin-1', errors='replace'
+            ).split(';')[1].split('=')[1].strip('"\'')
             return self.from_filename(filename)
         except IndexError:
             return Response

From 9c6d13075a2ab06c0d670e19457f9d9f219ceeee Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 18:56:14 -0300
Subject: [PATCH 107/181] Flake8: Remove E128 (tests, part 1)

---
 pytest.ini                                   | 28 +++++++++---------
 tests/test_command_parse.py                  |  4 +--
 tests/test_command_shell.py                  |  6 ++--
 tests/test_contracts.py                      |  3 +-
 tests/test_downloader_handlers.py            |  8 ++---
 tests/test_downloadermiddleware_httpproxy.py |  7 +++--
 tests/test_downloadermiddleware_redirect.py  |  2 +-
 tests/test_downloadermiddleware_retry.py     | 14 +++++++--
 tests/test_dupefilters.py                    | 31 +++++++++++++-------
 tests/test_http_request.py                   | 18 ++++--------
 tests/test_http_response.py                  | 20 ++++++++-----
 11 files changed, 80 insertions(+), 61 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 0a5a172b3..d8eefea7e 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -172,37 +172,37 @@ flake8-ignore =
     tests/spiders.py E501
     tests/test_closespider.py E501
     tests/test_command_fetch.py E501
-    tests/test_command_parse.py E501 E128
-    tests/test_command_shell.py E501 E128
-    tests/test_commands.py E128 E501
-    tests/test_contracts.py E501 E128
+    tests/test_command_parse.py E501
+    tests/test_command_shell.py E501
+    tests/test_commands.py E501
+    tests/test_contracts.py E501
     tests/test_crawl.py E501 E741
     tests/test_crawler.py F841 E501
     tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E128 E501
+    tests/test_downloader_handlers.py E501
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E741 E501 E128
+    tests/test_downloadermiddleware_cookies.py E741 E501
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
     tests/test_downloadermiddleware_httpcache.py E501
     tests/test_downloadermiddleware_httpcompression.py E501
     tests/test_downloadermiddleware_decompression.py E501
-    tests/test_downloadermiddleware_httpproxy.py E501 E128
-    tests/test_downloadermiddleware_redirect.py E501 E128
-    tests/test_downloadermiddleware_retry.py E501 E128
+    tests/test_downloadermiddleware_httpproxy.py E501
+    tests/test_downloadermiddleware_redirect.py E501
+    tests/test_downloadermiddleware_retry.py E501
     tests/test_downloadermiddleware_robotstxt.py E501
     tests/test_downloadermiddleware_stats.py E501
-    tests/test_dupefilters.py E501 E741 E128
-    tests/test_engine.py E501 E128
-    tests/test_exporters.py E501 E128
+    tests/test_dupefilters.py E501 E741
+    tests/test_engine.py E501
+    tests/test_exporters.py E501
     tests/test_extension_telnet.py F841
     tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
-    tests/test_http_request.py E402 E501 E128 E128
-    tests/test_http_response.py E501 E128
     tests/test_item.py E128 F841
+    tests/test_http_request.py E402 E501
+    tests/test_http_response.py E501
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128
     tests/test_loader.py E501 E741 E128 E117
diff --git a/tests/test_command_parse.py b/tests/test_command_parse.py
index 85a24d0bc..a09dcf072 100644
--- a/tests/test_command_parse.py
+++ b/tests/test_command_parse.py
@@ -142,8 +142,8 @@ ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
     @defer.inlineCallbacks
     def test_request_without_meta(self):
         _, _, stderr = yield self.execute(['--spider', self.spider_name,
-                                          '-c', 'parse_request_without_meta',
-                                          '--nolinks',
+                                           '-c', 'parse_request_without_meta',
+                                           '--nolinks',
                                            self.url('/html')])
         self.assertIn("DEBUG: It Works!", _textmode(stderr))
 
diff --git a/tests/test_command_shell.py b/tests/test_command_shell.py
index d664b6ade..93fda2648 100644
--- a/tests/test_command_shell.py
+++ b/tests/test_command_shell.py
@@ -101,15 +101,13 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
     @defer.inlineCallbacks
     def test_local_nofile(self):
         filepath = 'file:///tests/sample_data/test_site/nothinghere.html'
-        errcode, out, err = yield self.execute([filepath, '-c', 'item'],
-                                       check_code=False)
+        errcode, out, err = yield self.execute([filepath, '-c', 'item'], check_code=False)
         self.assertEqual(errcode, 1, out or err)
         self.assertIn(b'No such file or directory', err)
 
     @defer.inlineCallbacks
     def test_dns_failures(self):
         url = 'www.somedomainthatdoesntexi.st'
-        errcode, out, err = yield self.execute([url, '-c', 'item'],
-                                       check_code=False)
+        errcode, out, err = yield self.execute([url, '-c', 'item'], check_code=False)
         self.assertEqual(errcode, 1, out or err)
         self.assertIn(b'DNS lookup failed', err)
diff --git a/tests/test_contracts.py b/tests/test_contracts.py
index d1ce80f9d..99120b128 100644
--- a/tests/test_contracts.py
+++ b/tests/test_contracts.py
@@ -232,7 +232,8 @@ class ContractsManagerTest(unittest.TestCase):
         # extract contracts correctly
         contracts = self.conman.extract_contracts(spider.returns_request)
         self.assertEqual(len(contracts), 2)
-        self.assertEqual(frozenset(type(x) for x in contracts),
+        self.assertEqual(
+            frozenset(type(x) for x in contracts),
             frozenset([UrlContract, ReturnsContract]))
 
         # returns request for valid method
diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
index f93bce8ef..1a05b679a 100644
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -853,8 +853,7 @@ class S3TestCase(unittest.TestCase):
     def test_request_signing4(self):
         # fetches the access control policy sub-resource for the 'johnsmith' bucket.
         date = 'Tue, 27 Mar 2007 19:44:46 +0000'
-        req = Request('s3://johnsmith/?acl',
-            method='GET', headers={'Date': date})
+        req = Request('s3://johnsmith/?acl', method='GET', headers={'Date': date})
         with self._mocked_date(date):
             httpreq = self.download_request(req, self.spider)
         self.assertEqual(httpreq.headers['Authorization'],
@@ -879,8 +878,9 @@ class S3TestCase(unittest.TestCase):
         with self._mocked_date(date):
             httpreq = self.download_request(req, self.spider)
         # botocore does not override Date with x-amz-date
-        self.assertEqual(httpreq.headers['Authorization'],
-                b'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk=')
+        self.assertEqual(
+            httpreq.headers['Authorization'],
+            b'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk=')
 
     def test_request_signing6(self):
         # uploads an object to a CNAME style virtual hosted bucket with metadata.
diff --git a/tests/test_downloadermiddleware_httpproxy.py b/tests/test_downloadermiddleware_httpproxy.py
index 36743b1de..9841d7a76 100644
--- a/tests/test_downloadermiddleware_httpproxy.py
+++ b/tests/test_downloadermiddleware_httpproxy.py
@@ -43,8 +43,11 @@ class TestHttpProxyMiddleware(TestCase):
         os.environ.pop('file_proxy', None)
         mw = HttpProxyMiddleware()
 
-        for url, proxy in [('http://e.com', http_proxy),
-                ('https://e.com', https_proxy), ('file://tmp/a', None)]:
+        for url, proxy in [
+            ('http://e.com', http_proxy),
+            ('https://e.com', https_proxy),
+            ('file://tmp/a', None),
+        ]:
             req = Request(url)
             assert mw.process_request(req, spider) is None
             self.assertEqual(req.url, url)
diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
index 551e124ab..52c4d71a6 100644
--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -72,7 +72,7 @@ class RedirectMiddlewareTest(unittest.TestCase):
         url = 'http://www.example.com/302'
         url2 = 'http://www.example.com/redirected2'
         req = Request(url, method='POST', body='test',
-            headers={'Content-Type': 'text/plain', 'Content-length': '4'})
+                      headers={'Content-Type': 'text/plain', 'Content-length': '4'})
         rsp = Response(url, headers={'Location': url2}, status=302)
 
         req2 = self.mw.process_response(req, rsp, self.spider)
diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py
index e118750e3..29357ba94 100644
--- a/tests/test_downloadermiddleware_retry.py
+++ b/tests/test_downloadermiddleware_retry.py
@@ -81,9 +81,17 @@ class RetryTest(unittest.TestCase):
         assert self.crawler.stats.get_value('retry/count') == 2
 
     def test_twistederrors(self):
-        exceptions = [defer.TimeoutError, TCPTimedOutError, TimeoutError,
-                DNSLookupError, ConnectionRefusedError, ConnectionDone,
-                ConnectError, ConnectionLost, ResponseFailed]
+        exceptions = [
+            ConnectError,
+            ConnectionDone,
+            ConnectionLost,
+            ConnectionRefusedError,
+            defer.TimeoutError,
+            DNSLookupError,
+            ResponseFailed,
+            TCPTimedOutError,
+            TimeoutError,
+        ]
 
         for exc in exceptions:
             req = Request('http://www.scrapytest.org/%s' % exc.__name__)
diff --git a/tests/test_dupefilters.py b/tests/test_dupefilters.py
index 7426107c1..38b95b710 100644
--- a/tests/test_dupefilters.py
+++ b/tests/test_dupefilters.py
@@ -177,10 +177,13 @@ class RFPDupeFilterTest(unittest.TestCase):
             dupefilter.log(r2, spider)
 
             assert crawler.stats.get_value('dupefilter/filtered') == 2
-            l.check_present(('scrapy.dupefilters', 'DEBUG',
-                ('Filtered duplicate request: <GET http://scrapytest.org/index.html>'
-                ' - no more duplicates will be shown'
-                ' (see DUPEFILTER_DEBUG to show all duplicates)')))
+            l.check_present(
+                (
+                    'scrapy.dupefilters',
+                    'DEBUG',
+                    'Filtered duplicate request: <GET http://scrapytest.org/index.html> - no more duplicates will be shown (see DUPEFILTER_DEBUG to show all duplicates)'
+                )
+            )
 
             dupefilter.close('finished')
 
@@ -203,11 +206,19 @@ class RFPDupeFilterTest(unittest.TestCase):
             dupefilter.log(r2, spider)
 
             assert crawler.stats.get_value('dupefilter/filtered') == 2
-            l.check_present(('scrapy.dupefilters', 'DEBUG',
-                ('Filtered duplicate request: <GET http://scrapytest.org/index.html>'
-                ' (referer: None)')))
-            l.check_present(('scrapy.dupefilters', 'DEBUG',
-                ('Filtered duplicate request: <GET http://scrapytest.org/index.html>'
-                ' (referer: http://scrapytest.org/INDEX.html)')))
+            l.check_present(
+                (
+                    'scrapy.dupefilters',
+                    'DEBUG',
+                    'Filtered duplicate request: <GET http://scrapytest.org/index.html> (referer: None)'
+                )
+            )
+            l.check_present(
+                (
+                    'scrapy.dupefilters',
+                    'DEBUG',
+                    'Filtered duplicate request: <GET http://scrapytest.org/index.html> (referer: http://scrapytest.org/INDEX.html)'
+                )
+            )
 
             dupefilter.close('finished')
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index 77da15ce6..a672963f3 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -415,8 +415,7 @@ class FormRequestTest(RequestTest):
         # using multiples values for a single key
         data = {'price': u'\xa3 100', 'colours': ['red', 'blue', 'green']}
         r3 = self.request_class("http://www.example.com", formdata=data)
-        self.assertQueryEqual(r3.body,
-            b'colours=red&colours=blue&colours=green&price=%C2%A3+100')
+        self.assertQueryEqual(r3.body, b'colours=red&colours=blue&colours=green&price=%C2%A3+100')
 
     def test_from_response_post(self):
         response = _buildresponse(
@@ -426,8 +425,7 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test2" value="xxx">
             </form>""",
             url="http://www.example.com/this/list.html")
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
 
         self.assertEqual(req.method, 'POST')
         self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
@@ -446,8 +444,7 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test2" value="xxx \xc2\xb5">
             </form>""",
             url="http://www.example.com/this/list.html")
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
 
         self.assertEqual(req.method, 'POST')
         self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
@@ -468,8 +465,7 @@ class FormRequestTest(RequestTest):
             url="http://www.example.com/this/list.html",
             encoding='latin1',
         )
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
 
         self.assertEqual(req.method, 'POST')
         self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
@@ -488,8 +484,7 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test2" value="xxx µ">
             </form>""",
             url="http://www.example.com/this/list.html")
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
 
         self.assertEqual(req.method, 'POST')
         self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
@@ -549,8 +544,7 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test2" value="xxx">
             </form>""",
             url="http://www.example.com/this/list.html")
-        r1 = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        r1 = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
         self.assertEqual(r1.method, 'GET')
         self.assertEqual(urlparse(r1.url).hostname, "www.example.com")
         self.assertEqual(urlparse(r1.url).path, "/this/get.php")
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index 522ec4875..04f58d305 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -485,8 +485,10 @@ class TextResponseTest(BaseResponseTest):
             response.xpath("normalize-space(//p[@class=\"content\"])").getall(),
         )
         self.assertEqual(
-            response.xpath("//title[count(following::p[@class=$pclass])=$pcount]/text()",
-                pclass="content", pcount=1).getall(),
+            response.xpath(
+                "//title[count(following::p[@class=$pclass])=$pcount]/text()",
+                pclass="content", pcount=1,
+            ).getall(),
             response.xpath("//title[count(following::p[@class=\"content\"])=1]/text()").getall(),
         )
 
@@ -566,12 +568,14 @@ class TextResponseTest(BaseResponseTest):
             'http://example.com',
             body=b'''<html><body><a href=" foo\n">click me</a></body></html>'''
         )
-        self._assert_followed_url(resp.css('a')[0],
-                                 'http://example.com/foo',
-                                  response=resp)
-        self._assert_followed_url(resp.css('a::attr(href)')[0],
-                                 'http://example.com/foo',
-                                  response=resp)
+        self._assert_followed_url(
+            resp.css('a')[0],
+            'http://example.com/foo',
+            response=resp)
+        self._assert_followed_url(
+            resp.css('a::attr(href)')[0],
+            'http://example.com/foo',
+            response=resp)
 
     def test_follow_encoding(self):
         resp1 = self.response_class(

From bf56517abfc3d2287c08e1c88f07de7d3b1c499c Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 19:18:29 -0300
Subject: [PATCH 108/181] Flake8: Remove E128 (tests, part 2)

---
 pytest.ini                               | 20 ++++-----
 tests/test_item.py                       | 22 +++++-----
 tests/test_linkextractors.py             |  6 +--
 tests/test_logformatter.py               | 21 +++++----
 tests/test_mail.py                       |  2 +-
 tests/test_middleware.py                 |  9 ++--
 tests/test_pipeline_crawl.py             | 12 ++++--
 tests/test_pipeline_media.py             | 12 +++---
 tests/test_spidermiddleware_httperror.py | 55 ++++++++----------------
 tests/test_spidermiddleware_offsite.py   | 32 ++++++++------
 10 files changed, 94 insertions(+), 97 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index d8eefea7e..0b2766031 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -200,19 +200,19 @@ flake8-ignore =
     tests/test_feedexport.py E501 F841
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
-    tests/test_item.py E128 F841
     tests/test_http_request.py E402 E501
     tests/test_http_response.py E501
+    tests/test_item.py E501 F841
     tests/test_link.py E501
-    tests/test_linkextractors.py E501 E128
-    tests/test_loader.py E501 E741 E128 E117
-    tests/test_logformatter.py E128 E501
-    tests/test_mail.py E128 E501
-    tests/test_middleware.py E501 E128
-    tests/test_pipeline_crawl.py E501 E128
+    tests/test_linkextractors.py E501
+    tests/test_loader.py E501 E741 E117
+    tests/test_logformatter.py E501
+    tests/test_mail.py E501
+    tests/test_middleware.py E501
+    tests/test_pipeline_crawl.py E501
     tests/test_pipeline_files.py E501
     tests/test_pipeline_images.py F841 E501
-    tests/test_pipeline_media.py E501 E741 E128
+    tests/test_pipeline_media.py E501 E741
     tests/test_proxy_connect.py E501 E741
     tests/test_request_cb_kwargs.py E501
     tests/test_responsetypes.py E501
@@ -221,8 +221,8 @@ flake8-ignore =
     tests/test_selector.py E501
     tests/test_spider.py E501
     tests/test_spidermiddleware.py E501
-    tests/test_spidermiddleware_httperror.py E128 E501 E121
-    tests/test_spidermiddleware_offsite.py E501 E128 E111
+    tests/test_spidermiddleware_httperror.py E501 E121
+    tests/test_spidermiddleware_offsite.py E501 E111
     tests/test_spidermiddleware_output_chain.py E501
     tests/test_spidermiddleware_referer.py E501 F841 E501 E121
     tests/test_squeues.py E501 E741
diff --git a/tests/test_item.py b/tests/test_item.py
index 4017f6e84..4fe247cda 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -162,8 +162,7 @@ class ItemTest(unittest.TestCase):
         item = D(save='X', load='Y')
         self.assertEqual(item['save'], 'X')
         self.assertEqual(item['load'], 'Y')
-        self.assertEqual(D.fields, {'load': {'default': 'A'},
-            'save': {'default': 'A'}})
+        self.assertEqual(D.fields, {'load': {'default': 'A'}, 'save': {'default': 'A'}})
 
         # D class inverted
         class E(C, B):
@@ -171,8 +170,7 @@ class ItemTest(unittest.TestCase):
 
         self.assertEqual(E(save='X')['save'], 'X')
         self.assertEqual(E(load='X')['load'], 'X')
-        self.assertEqual(E.fields, {'load': {'default': 'C'},
-            'save': {'default': 'C'}})
+        self.assertEqual(E.fields, {'load': {'default': 'C'}, 'save': {'default': 'C'}})
 
     def test_metaclass_multiple_inheritance_diamond(self):
         class A(Item):
@@ -193,8 +191,9 @@ class ItemTest(unittest.TestCase):
 
         self.assertEqual(D(save='X')['save'], 'X')
         self.assertEqual(D(load='X')['load'], 'X')
-        self.assertEqual(D.fields, {'save': {'default': 'C'},
-            'load': {'default': 'D'}, 'update': {'default': 'D'}})
+        self.assertEqual(
+            D.fields,
+            {'save': {'default': 'C'}, 'load': {'default': 'D'}, 'update': {'default': 'D'}})
 
         # D class inverted
         class E(C, B):
@@ -202,8 +201,9 @@ class ItemTest(unittest.TestCase):
 
         self.assertEqual(E(save='X')['save'], 'X')
         self.assertEqual(E(load='X')['load'], 'X')
-        self.assertEqual(E.fields, {'save': {'default': 'C'},
-            'load': {'default': 'E'}, 'update': {'default': 'C'}})
+        self.assertEqual(
+            E.fields,
+            {'save': {'default': 'C'}, 'load': {'default': 'E'}, 'update': {'default': 'C'}})
 
     def test_metaclass_multiple_inheritance_without_metaclass(self):
         class A(Item):
@@ -223,8 +223,7 @@ class ItemTest(unittest.TestCase):
 
         self.assertRaises(KeyError, D, not_allowed='value')
         self.assertEqual(D(save='X')['save'], 'X')
-        self.assertEqual(D.fields, {'save': {'default': 'A'},
-            'load': {'default': 'A'}})
+        self.assertEqual(D.fields, {'save': {'default': 'A'}, 'load': {'default': 'A'}})
 
         # D class inverted
         class E(C, B):
@@ -232,8 +231,7 @@ class ItemTest(unittest.TestCase):
 
         self.assertRaises(KeyError, E, not_allowed='value')
         self.assertEqual(E(save='X')['save'], 'X')
-        self.assertEqual(E.fields, {'save': {'default': 'A'},
-            'load': {'default': 'A'}})
+        self.assertEqual(E.fields, {'save': {'default': 'A'}, 'load': {'default': 'A'}})
 
     def test_to_dict(self):
         class TestItem(Item):
diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py
index 68e8514ba..7e1b62b7f 100644
--- a/tests/test_linkextractors.py
+++ b/tests/test_linkextractors.py
@@ -171,9 +171,9 @@ class Base:
             self.assertEqual(lx.matches(url1), False)
             self.assertEqual(lx.matches(url2), True)
 
-            lx = self.extractor_cls(allow=('blah1',), deny=('blah2',),
-                                   allow_domains=('blah1.com',),
-                                   deny_domains=('blah2.com',))
+            lx = self.extractor_cls(allow=['blah1'], deny=['blah2'],
+                                    allow_domains=['blah1.com'],
+                                    deny_domains=['blah2.com'])
             self.assertEqual(lx.matches('http://blah1.com/blah1'), True)
             self.assertEqual(lx.matches('http://blah1.com/blah2'), False)
             self.assertEqual(lx.matches('http://blah2.com/blah1'), False)
diff --git a/tests/test_logformatter.py b/tests/test_logformatter.py
index cd6cb8016..7064337ad 100644
--- a/tests/test_logformatter.py
+++ b/tests/test_logformatter.py
@@ -34,15 +34,15 @@ class LogFormatterTestCase(unittest.TestCase):
         res = Response("http://www.example.com")
         logkws = self.formatter.crawled(req, res, self.spider)
         logline = logkws['msg'] % logkws['args']
-        self.assertEqual(logline,
-            "Crawled (200) <GET http://www.example.com> (referer: None)")
+        self.assertEqual(logline, "Crawled (200) <GET http://www.example.com> (referer: None)")
 
     def test_crawled_without_referer(self):
         req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
         res = Response("http://www.example.com", flags=['cached'])
         logkws = self.formatter.crawled(req, res, self.spider)
         logline = logkws['msg'] % logkws['args']
-        self.assertEqual(logline,
+        self.assertEqual(
+            logline,
             "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
 
     def test_flags_in_request(self):
@@ -50,8 +50,9 @@ class LogFormatterTestCase(unittest.TestCase):
         res = Response("http://www.example.com")
         logkws = self.formatter.crawled(req, res, self.spider)
         logline = logkws['msg'] % logkws['args']
-        self.assertEqual(logline,
-        "Crawled (200) <GET http://www.example.com> ['test', 'flag'] (referer: None)")
+        self.assertEqual(
+            logline,
+            "Crawled (200) <GET http://www.example.com> ['test', 'flag'] (referer: None)")
 
     def test_dropped(self):
         item = {}
@@ -140,7 +141,8 @@ class LogformatterSubclassTest(LogFormatterTestCase):
         res = Response("http://www.example.com")
         logkws = self.formatter.crawled(req, res, self.spider)
         logline = logkws['msg'] % logkws['args']
-        self.assertEqual(logline,
+        self.assertEqual(
+            logline,
             "Crawled (200) <GET http://www.example.com> (referer: None) []")
 
     def test_crawled_without_referer(self):
@@ -148,7 +150,8 @@ class LogformatterSubclassTest(LogFormatterTestCase):
         res = Response("http://www.example.com")
         logkws = self.formatter.crawled(req, res, self.spider)
         logline = logkws['msg'] % logkws['args']
-        self.assertEqual(logline,
+        self.assertEqual(
+            logline,
             "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
 
     def test_flags_in_request(self):
@@ -156,7 +159,9 @@ class LogformatterSubclassTest(LogFormatterTestCase):
         res = Response("http://www.example.com")
         logkws = self.formatter.crawled(req, res, self.spider)
         logline = logkws['msg'] % logkws['args']
-        self.assertEqual(logline, "Crawled (200) <GET http://www.example.com> (referer: None) ['test', 'flag']")
+        self.assertEqual(
+            logline,
+            "Crawled (200) <GET http://www.example.com> (referer: None) ['test', 'flag']")
 
 
 class SkipMessagesLogFormatter(LogFormatter):
diff --git a/tests/test_mail.py b/tests/test_mail.py
index f5cb81a8b..53dbc0686 100644
--- a/tests/test_mail.py
+++ b/tests/test_mail.py
@@ -49,7 +49,7 @@ class MailSenderTest(unittest.TestCase):
 
         mailsender = MailSender(debug=True)
         mailsender.send(to=['test@scrapy.org'], subject='subject', body='body',
-                       attachs=attachs, _callback=self._catch_mail_sent)
+                        attachs=attachs, _callback=self._catch_mail_sent)
 
         assert self.catched_msg
         self.assertEqual(self.catched_msg['to'], ['test@scrapy.org'])
diff --git a/tests/test_middleware.py b/tests/test_middleware.py
index 3af514bb0..3364d2258 100644
--- a/tests/test_middleware.py
+++ b/tests/test_middleware.py
@@ -69,11 +69,14 @@ class MiddlewareManagerTest(unittest.TestCase):
 
     def test_methods(self):
         mwman = TestMiddlewareManager(M1(), M2(), M3())
-        self.assertEqual([x.__self__.__class__ for x in mwman.methods['open_spider']],
+        self.assertEqual(
+            [x.__self__.__class__ for x in mwman.methods['open_spider']],
             [M1, M2])
-        self.assertEqual([x.__self__.__class__ for x in mwman.methods['close_spider']],
+        self.assertEqual(
+            [x.__self__.__class__ for x in mwman.methods['close_spider']],
             [M2, M1])
-        self.assertEqual([x.__self__.__class__ for x in mwman.methods['process']],
+        self.assertEqual(
+            [x.__self__.__class__ for x in mwman.methods['process']],
             [M1, M3])
 
     def test_enabled(self):
diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py
index 24c516473..188c5c3cf 100644
--- a/tests/test_pipeline_crawl.py
+++ b/tests/test_pipeline_crawl.py
@@ -132,7 +132,8 @@ class FileDownloadCrawlTestCase(TestCase):
     def test_download_media(self):
         crawler = self._create_crawler(MediaDownloadSpider)
         with LogCapture() as log:
-            yield crawler.crawl(self.mockserver.url("/files/images/"),
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
                 media_key=self.media_key,
                 media_urls_key=self.media_urls_key)
         self._assert_files_downloaded(self.items, str(log))
@@ -141,7 +142,8 @@ class FileDownloadCrawlTestCase(TestCase):
     def test_download_media_wrong_urls(self):
         crawler = self._create_crawler(BrokenLinksMediaDownloadSpider)
         with LogCapture() as log:
-            yield crawler.crawl(self.mockserver.url("/files/images/"),
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
                 media_key=self.media_key,
                 media_urls_key=self.media_urls_key)
         self._assert_files_download_failure(crawler, self.items, 404, str(log))
@@ -150,7 +152,8 @@ class FileDownloadCrawlTestCase(TestCase):
     def test_download_media_redirected_default_failure(self):
         crawler = self._create_crawler(RedirectedMediaDownloadSpider)
         with LogCapture() as log:
-            yield crawler.crawl(self.mockserver.url("/files/images/"),
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
                 media_key=self.media_key,
                 media_urls_key=self.media_urls_key,
                 mockserver=self.mockserver)
@@ -164,7 +167,8 @@ class FileDownloadCrawlTestCase(TestCase):
 
         crawler = self._create_crawler(RedirectedMediaDownloadSpider)
         with LogCapture() as log:
-            yield crawler.crawl(self.mockserver.url("/files/images/"),
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
                 media_key=self.media_key,
                 media_urls_key=self.media_urls_key,
                 mockserver=self.mockserver)
diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py
index 949f0dea1..b51f82422 100644
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@@ -214,9 +214,9 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(True, rsp)])
-        self.assertEqual(self.pipe._mockcalled,
-                ['get_media_requests', 'media_to_download',
-                    'media_downloaded', 'request_callback', 'item_completed'])
+        self.assertEqual(
+            self.pipe._mockcalled,
+            ['get_media_requests', 'media_to_download', 'media_downloaded', 'request_callback', 'item_completed'])
 
     @inlineCallbacks
     def test_result_failure(self):
@@ -227,9 +227,9 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(False, fail)])
-        self.assertEqual(self.pipe._mockcalled,
-                ['get_media_requests', 'media_to_download',
-                    'media_failed', 'request_errback', 'item_completed'])
+        self.assertEqual(
+            self.pipe._mockcalled,
+            ['get_media_requests', 'media_to_download', 'media_failed', 'request_errback', 'item_completed'])
 
     @inlineCallbacks
     def test_mix_of_success_and_failure(self):
diff --git a/tests/test_spidermiddleware_httperror.py b/tests/test_spidermiddleware_httperror.py
index 6b61df56f..714279ae0 100644
--- a/tests/test_spidermiddleware_httperror.py
+++ b/tests/test_spidermiddleware_httperror.py
@@ -68,29 +68,23 @@ class TestHttpErrorMiddleware(TestCase):
         self.res200, self.res404 = _responses(self.req, [200, 404])
 
     def test_process_spider_input(self):
-        self.assertEqual(None,
-                self.mw.process_spider_input(self.res200, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, self.res404, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(self.res200, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, self.res404, self.spider)
 
     def test_process_spider_exception(self):
-        self.assertEqual([],
-                self.mw.process_spider_exception(self.res404,
-                        HttpError(self.res404), self.spider))
-        self.assertEqual(None,
-                self.mw.process_spider_exception(self.res404,
-                        Exception(), self.spider))
+        self.assertEqual(
+            [],
+            self.mw.process_spider_exception(self.res404, HttpError(self.res404), self.spider))
+        self.assertIsNone(self.mw.process_spider_exception(self.res404, Exception(), self.spider))
 
     def test_handle_httpstatus_list(self):
         res = self.res404.copy()
         res.request = Request('http://scrapytest.org',
                               meta={'handle_httpstatus_list': [404]})
-        self.assertEqual(None,
-            self.mw.process_spider_input(res, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(res, self.spider))
 
         self.spider.handle_httpstatus_list = [404]
-        self.assertEqual(None,
-            self.mw.process_spider_input(self.res404, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res404, self.spider))
 
 
 class TestHttpErrorMiddlewareSettings(TestCase):
@@ -103,12 +97,9 @@ class TestHttpErrorMiddlewareSettings(TestCase):
         self.res200, self.res404, self.res402 = _responses(self.req, [200, 404, 402])
 
     def test_process_spider_input(self):
-        self.assertEqual(None,
-                self.mw.process_spider_input(self.res200, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, self.res404, self.spider)
-        self.assertEqual(None,
-                self.mw.process_spider_input(self.res402, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res200, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, self.res404, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(self.res402, self.spider))
 
     def test_meta_overrides_settings(self):
         request = Request('http://scrapytest.org', meta={'handle_httpstatus_list': [404]})
@@ -117,17 +108,13 @@ class TestHttpErrorMiddlewareSettings(TestCase):
         res402 = self.res402.copy()
         res402.request = request
 
-        self.assertEqual(None,
-            self.mw.process_spider_input(res404, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, res402, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(res404, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, res402, self.spider)
 
     def test_spider_override_settings(self):
         self.spider.handle_httpstatus_list = [404]
-        self.assertEqual(None,
-            self.mw.process_spider_input(self.res404, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, self.res402, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(self.res404, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, self.res402, self.spider)
 
 
 class TestHttpErrorMiddlewareHandleAll(TestCase):
@@ -139,10 +126,8 @@ class TestHttpErrorMiddlewareHandleAll(TestCase):
         self.res200, self.res404, self.res402 = _responses(self.req, [200, 404, 402])
 
     def test_process_spider_input(self):
-        self.assertEqual(None,
-                self.mw.process_spider_input(self.res200, self.spider))
-        self.assertEqual(None,
-                self.mw.process_spider_input(self.res404, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res200, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res404, self.spider))
 
     def test_meta_overrides_settings(self):
         request = Request('http://scrapytest.org', meta={'handle_httpstatus_list': [404]})
@@ -151,10 +136,8 @@ class TestHttpErrorMiddlewareHandleAll(TestCase):
         res402 = self.res402.copy()
         res402.request = request
 
-        self.assertEqual(None,
-            self.mw.process_spider_input(res404, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, res402, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(res404, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, res402, self.spider)
 
 
 class TestHttpErrorMiddlewareIntegrational(TrialTestCase):
diff --git a/tests/test_spidermiddleware_offsite.py b/tests/test_spidermiddleware_offsite.py
index b96807bc2..0f4b98a07 100644
--- a/tests/test_spidermiddleware_offsite.py
+++ b/tests/test_spidermiddleware_offsite.py
@@ -22,20 +22,24 @@ class TestOffsiteMiddleware(TestCase):
     def test_process_spider_output(self):
         res = Response('http://scrapytest.org')
 
-        onsite_reqs = [Request('http://scrapytest.org/1'),
-                       Request('http://scrapy.org/1'),
-                       Request('http://sub.scrapy.org/1'),
-                       Request('http://offsite.tld/letmepass', dont_filter=True),
-                       Request('http://scrapy.test.org/'),
-                       Request('http://scrapy.test.org:8000/')]
-        offsite_reqs = [Request('http://scrapy2.org'),
-                       Request('http://offsite.tld/'),
-                       Request('http://offsite.tld/scrapytest.org'),
-                       Request('http://offsite.tld/rogue.scrapytest.org'),
-                       Request('http://rogue.scrapytest.org.haha.com'),
-                       Request('http://roguescrapytest.org'),
-                       Request('http://test.org/'),
-                       Request('http://notscrapy.test.org/')]
+        onsite_reqs = [
+            Request('http://scrapytest.org/1'),
+            Request('http://scrapy.org/1'),
+            Request('http://sub.scrapy.org/1'),
+            Request('http://offsite.tld/letmepass', dont_filter=True),
+            Request('http://scrapy.test.org/'),
+            Request('http://scrapy.test.org:8000/'),
+        ]
+        offsite_reqs = [
+            Request('http://scrapy2.org'),
+            Request('http://offsite.tld/'),
+            Request('http://offsite.tld/scrapytest.org'),
+            Request('http://offsite.tld/rogue.scrapytest.org'),
+            Request('http://rogue.scrapytest.org.haha.com'),
+            Request('http://roguescrapytest.org'),
+            Request('http://test.org/'),
+            Request('http://notscrapy.test.org/'),
+        ]
         reqs = onsite_reqs + offsite_reqs
 
         out = list(self.mw.process_spider_output(res, reqs, self.spider))

From 881b4f417f645fa9719e6c54eb3788cc67d56053 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 19:33:22 -0300
Subject: [PATCH 109/181] Flake8: Remove E128 (tests, part 3)

---
 pytest.ini                          | 18 ++++++-------
 tests/test_settings/__init__.py     | 18 ++++++++-----
 tests/test_spiderloader/__init__.py | 21 ++++++++++-----
 tests/test_utils_conf.py            |  3 ++-
 tests/test_utils_iterators.py       | 23 +++++++++++------
 tests/test_utils_reqser.py          |  3 ++-
 tests/test_utils_request.py         |  5 ++--
 tests/test_utils_sitemap.py         |  9 +++++--
 tests/test_webclient.py             | 40 +++++++++++++++++------------
 9 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 0b2766031..7c3c9cc5b 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -227,23 +227,23 @@ flake8-ignore =
     tests/test_spidermiddleware_referer.py E501 F841 E501 E121
     tests/test_squeues.py E501 E741
     tests/test_utils_asyncio.py E501
-    tests/test_utils_conf.py E501 E128
+    tests/test_utils_conf.py E501
     tests/test_utils_curl.py E501
     tests/test_utils_datatypes.py E402 E501
     tests/test_utils_defer.py E501 F841
     tests/test_utils_deprecate.py F841 E501
-    tests/test_utils_http.py E501 E128
-    tests/test_utils_iterators.py E501 E128 E129
+    tests/test_utils_http.py E501
+    tests/test_utils_iterators.py E501 E129
     tests/test_utils_log.py E741
     tests/test_utils_python.py E501
-    tests/test_utils_reqser.py E501 E128
-    tests/test_utils_request.py E501 E128
+    tests/test_utils_reqser.py E501
+    tests/test_utils_request.py E501
     tests/test_utils_response.py E501
     tests/test_utils_signal.py E741 F841
-    tests/test_utils_sitemap.py E128 E501
+    tests/test_utils_sitemap.py E501
     tests/test_utils_url.py E501 E501
-    tests/test_webclient.py E501 E128 E402
+    tests/test_webclient.py E501 E402
     tests/test_cmdline/__init__.py E501
-    tests/test_settings/__init__.py E501 E128
-    tests/test_spiderloader/__init__.py E128 E501
+    tests/test_settings/__init__.py E501
+    tests/test_spiderloader/__init__.py E501
     tests/test_utils_misc/__init__.py E501
diff --git a/tests/test_settings/__init__.py b/tests/test_settings/__init__.py
index fda44653a..2da6aa4b5 100644
--- a/tests/test_settings/__init__.py
+++ b/tests/test_settings/__init__.py
@@ -314,13 +314,17 @@ class BaseSettingsTest(unittest.TestCase):
                           'TEST_BASE': BaseSettings({1: 1, 2: 2}, 'project'),
                           'TEST': BaseSettings({1: 10, 3: 30}, 'default'),
                           'HASNOBASE': BaseSettings({3: 3000}, 'default')})
-        self.assertDictEqual(s.copy_to_dict(),
-                            {'HASNOBASE': {3: 3000},
-                             'TEST': {1: 10, 3: 30},
-                             'TEST_BASE': {1: 1, 2: 2},
-                             'TEST_BOOLEAN': False,
-                             'TEST_LIST': [1, 2],
-                             'TEST_STRING': 'a string'})
+        self.assertDictEqual(
+            s.copy_to_dict(),
+            {
+                'HASNOBASE': {3: 3000},
+                'TEST': {1: 10, 3: 30},
+                'TEST_BASE': {1: 1, 2: 2},
+                'TEST_LIST': [1, 2],
+                'TEST_BOOLEAN': False,
+                'TEST_STRING': 'a string',
+            }
+        )
 
     def test_freeze(self):
         self.settings.freeze()
diff --git a/tests/test_spiderloader/__init__.py b/tests/test_spiderloader/__init__.py
index d8be6e277..b20e1b3f2 100644
--- a/tests/test_spiderloader/__init__.py
+++ b/tests/test_spiderloader/__init__.py
@@ -40,7 +40,8 @@ class SpiderLoaderTest(unittest.TestCase):
         verifyObject(ISpiderLoader, self.spider_loader)
 
     def test_list(self):
-        self.assertEqual(set(self.spider_loader.list()),
+        self.assertEqual(
+            set(self.spider_loader.list()),
             set(['spider1', 'spider2', 'spider3', 'spider4']))
 
     def test_load(self):
@@ -48,17 +49,23 @@ class SpiderLoaderTest(unittest.TestCase):
         self.assertEqual(spider1.__name__, 'Spider1')
 
     def test_find_by_request(self):
-        self.assertEqual(self.spider_loader.find_by_request(Request('http://scrapy1.org/test')),
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://scrapy1.org/test')),
             ['spider1'])
-        self.assertEqual(self.spider_loader.find_by_request(Request('http://scrapy2.org/test')),
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://scrapy2.org/test')),
             ['spider2'])
-        self.assertEqual(set(self.spider_loader.find_by_request(Request('http://scrapy3.org/test'))),
+        self.assertEqual(
+            set(self.spider_loader.find_by_request(Request('http://scrapy3.org/test'))),
             set(['spider1', 'spider2']))
-        self.assertEqual(self.spider_loader.find_by_request(Request('http://scrapy999.org/test')),
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://scrapy999.org/test')),
             [])
-        self.assertEqual(self.spider_loader.find_by_request(Request('http://spider3.com')),
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://spider3.com')),
             [])
-        self.assertEqual(self.spider_loader.find_by_request(Request('http://spider3.com/onlythis')),
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://spider3.com/onlythis')),
             ['spider3'])
 
     def test_load_spider_module(self):
diff --git a/tests/test_utils_conf.py b/tests/test_utils_conf.py
index 332120021..e5d3ef582 100644
--- a/tests/test_utils_conf.py
+++ b/tests/test_utils_conf.py
@@ -93,7 +93,8 @@ class BuildComponentListTest(unittest.TestCase):
 class UtilsConfTestCase(unittest.TestCase):
 
     def test_arglist_to_dict(self):
-        self.assertEqual(arglist_to_dict(['arg1=val1', 'arg2=val2']),
+        self.assertEqual(
+            arglist_to_dict(['arg1=val1', 'arg2=val2']),
             {'arg1': 'val1', 'arg2': 'val2'})
 
 
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index a85087619..c677dabe5 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -47,8 +47,7 @@ class XmliterTestCase(unittest.TestCase):
             </root>
         """
         response = XmlResponse(url="http://example.com", body=body)
-        nodenames = [e.xpath('name()').getall()
-                 for e in self.xmliter(response, 'matchme...')]
+        nodenames = [e.xpath('name()').getall() for e in self.xmliter(response, 'matchme...')]
         self.assertEqual(nodenames, [['matchme...']])
 
     def test_xmliter_unicode(self):
@@ -359,15 +358,23 @@ class UtilsCsvTestCase(unittest.TestCase):
 
         response = TextResponse(url="http://example.com/", body=body1, encoding='latin1')
         csv = csviter(response)
-        self.assertEqual([row for row in csv],
-            [{u'id': u'1', u'name': u'latin1', u'value': u'test'},
-             {u'id': u'2', u'name': u'something', u'value': u'\xf1\xe1\xe9\xf3'}])
+        self.assertEqual(
+            list(csv),
+            [
+                {u'id': u'1', u'name': u'latin1', u'value': u'test'},
+                {u'id': u'2', u'name': u'something', u'value': u'\xf1\xe1\xe9\xf3'},
+            ]
+        )
 
         response = TextResponse(url="http://example.com/", body=body2, encoding='cp852')
         csv = csviter(response)
-        self.assertEqual([row for row in csv],
-            [{u'id': u'1', u'name': u'cp852', u'value': u'test'},
-             {u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])
+        self.assertEqual(
+            list(csv),
+            [
+                {u'id': u'1', u'name': u'cp852', u'value': u'test'},
+                {u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'},
+            ]
+        )
 
 
 class TestHelper(unittest.TestCase):
diff --git a/tests/test_utils_reqser.py b/tests/test_utils_reqser.py
index 50b026d1c..450e4bdca 100644
--- a/tests/test_utils_reqser.py
+++ b/tests/test_utils_reqser.py
@@ -15,7 +15,8 @@ class RequestSerializationTest(unittest.TestCase):
         self._assert_serializes_ok(r)
 
     def test_all_attributes(self):
-        r = Request("http://www.example.com",
+        r = Request(
+            url="http://www.example.com",
             callback=self.spider.parse_item,
             errback=self.spider.handle_error,
             method="POST",
diff --git a/tests/test_utils_request.py b/tests/test_utils_request.py
index 45f0f59e4..50efb63ca 100644
--- a/tests/test_utils_request.py
+++ b/tests/test_utils_request.py
@@ -36,8 +36,9 @@ class UtilsRequestTest(unittest.TestCase):
         self.assertEqual(request_fingerprint(r1),
                          request_fingerprint(r1, include_headers=['Accept-Language']))
 
-        self.assertNotEqual(request_fingerprint(r1),
-                         request_fingerprint(r2, include_headers=['Accept-Language']))
+        self.assertNotEqual(
+            request_fingerprint(r1),
+            request_fingerprint(r2, include_headers=['Accept-Language']))
 
         self.assertEqual(request_fingerprint(r3, include_headers=['accept-language', 'sessionid']),
                          request_fingerprint(r3, include_headers=['SESSIONID', 'Accept-Language']))
diff --git a/tests/test_utils_sitemap.py b/tests/test_utils_sitemap.py
index 08b215434..d77978ff1 100644
--- a/tests/test_utils_sitemap.py
+++ b/tests/test_utils_sitemap.py
@@ -22,8 +22,13 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>""")
         assert s.type == 'urlset'
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'}, {'priority': '0.8', 'loc': 'http://www.example.com/Special-Offers.html', 'lastmod': '2009-08-16', 'changefreq': 'weekly'}])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'priority': '0.8', 'loc': 'http://www.example.com/Special-Offers.html', 'lastmod': '2009-08-16', 'changefreq': 'weekly'},
+            ]
+        )
 
     def test_sitemap_index(self):
         s = Sitemap(b"""<?xml version="1.0" encoding="UTF-8"?>
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index 307fadb5c..188e54602 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -47,8 +47,9 @@ def getPage(url, contextFactory=None, response_transform=None, *args, **kwargs):
         return f
 
     from twisted.web.client import _makeGetterFactory
-    return _makeGetterFactory(to_bytes(url), _clientfactory,
-        contextFactory=contextFactory, *args, **kwargs).deferred
+    return _makeGetterFactory(
+        to_bytes(url), _clientfactory, contextFactory=contextFactory, *args, **kwargs
+    ).deferred
 
 
 class ParseUrlTestCase(unittest.TestCase):
@@ -105,7 +106,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
                 'Content-Length': '12981',
                 'Useful': 'value'}))
 
-        self._test(factory,
+        self._test(
+            factory,
             b"GET /bar HTTP/1.0\r\n"
             b"Content-Length: 9\r\n"
             b"Useful: value\r\n"
@@ -118,7 +120,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
 
         # test minimal sent headers
         factory = client.ScrapyHTTPClientFactory(Request('http://foo/bar'))
-        self._test(factory,
+        self._test(
+            factory,
             b"GET /bar HTTP/1.0\r\n"
             b"Host: foo\r\n"
             b"\r\n")
@@ -130,7 +133,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             body='name=value',
             headers={'Content-Type': 'application/x-www-form-urlencoded'}))
 
-        self._test(factory,
+        self._test(
+            factory,
             b"POST /bar HTTP/1.0\r\n"
             b"Host: foo\r\n"
             b"Connection: close\r\n"
@@ -145,7 +149,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             url='http://foo/bar'
         ))
 
-        self._test(factory,
+        self._test(
+            factory,
             b"POST /bar HTTP/1.0\r\n"
             b"Host: foo\r\n"
             b"Content-Length: 0\r\n"
@@ -160,7 +165,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             },
         ))
 
-        self._test(factory,
+        self._test(
+            factory,
             b"GET /bar HTTP/1.0\r\n"
             b"Host: foo\r\n"
             b"X-Meta-Multivalued: value1\r\n"
@@ -177,7 +183,8 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             }),
         ))
 
-        self._test(factory,
+        self._test(
+            factory,
             b"GET /bar HTTP/1.0\r\n"
             b"Host: foo\r\n"
             b"X-Meta-Multivalued: value1\r\n"
@@ -206,8 +213,7 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
         protocol.dataReceived(b"Hello: World\n")
         protocol.dataReceived(b"Foo: Bar\n")
         protocol.dataReceived(b"\n")
-        self.assertEqual(protocol.headers,
-            Headers({'Hello': ['World'], 'Foo': ['Bar']}))
+        self.assertEqual(protocol.headers, Headers({'Hello': ['World'], 'Foo': ['Bar']}))
 
 
 class EncodingResource(resource.Resource):
@@ -340,10 +346,11 @@ class WebClientTestCase(unittest.TestCase):
         return getPage(self.getURL("redirect")).addCallback(self._cbRedirect)
 
     def _cbRedirect(self, pageData):
-        self.assertEqual(pageData,
-                b'\n<html>\n    <head>\n        <meta http-equiv="refresh" content="0;URL=/file">\n'
-                b'    </head>\n    <body bgcolor="#FFFFFF" text="#000000">\n    '
-                b'<a href="/file">click here</a>\n    </body>\n</html>\n')
+        self.assertEqual(
+            pageData,
+            b'\n<html>\n    <head>\n        <meta http-equiv="refresh" content="0;URL=/file">\n'
+            b'    </head>\n    <body bgcolor="#FFFFFF" text="#000000">\n    '
+            b'<a href="/file">click here</a>\n    </body>\n</html>\n')
 
     def test_encoding(self):
         """ Test that non-standart body encoding matches
@@ -403,8 +410,9 @@ class WebClientCustomCiphersSSLTestCase(WebClientSSLTestCase):
         s = "0123456789" * 10
         settings = Settings({'DOWNLOADER_CLIENT_TLS_CIPHERS': self.custom_ciphers})
         client_context_factory = create_instance(ScrapyClientContextFactory, settings=settings, crawler=None)
-        return getPage(self.getURL("payload"), body=s,
-                       contextFactory=client_context_factory).addCallback(self.assertEqual, to_bytes(s))
+        return getPage(
+            self.getURL("payload"), body=s, contextFactory=client_context_factory
+        ).addCallback(self.assertEqual, to_bytes(s))
 
     def testPayloadDefaultCiphers(self):
         s = "0123456789" * 10

From d472402a0232781753515d9552b7a1997b43543a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 6 May 2020 14:39:17 -0300
Subject: [PATCH 110/181] Fix pickle test for pypy

---
 pytest.ini            | 1 +
 tests/test_squeues.py | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 4f3494e0e..d107c1fbe 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -166,6 +166,7 @@ flake8-ignore =
     scrapy/signalmanager.py E501
     scrapy/spiderloader.py F841 E501 E126
     scrapy/squeues.py E128
+    scrapy/squeues.py E501
     scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501
diff --git a/tests/test_squeues.py b/tests/test_squeues.py
index 51c0c028a..d2cf9135f 100644
--- a/tests/test_squeues.py
+++ b/tests/test_squeues.py
@@ -1,3 +1,6 @@
+import pickle
+import sys
+
 from queuelib.tests import test_queue as t
 from scrapy.squeues import (
     MarshalFifoDiskQueueNonRequest as MarshalFifoDiskQueue,
@@ -108,8 +111,10 @@ class PickleFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
         try:
             q.push(lambda x: x)
         except ValueError as exc:
-            self.assertIsInstance(exc.__context__, AttributeError)
-
+            if hasattr(sys, "pypy_version_info"):
+                self.assertIsInstance(exc.__context__, pickle.PicklingError)
+            else:
+                self.assertIsInstance(exc.__context__, AttributeError)
         sel = Selector(text='<html><body><p>some text</p></body></html>')
         try:
             q.push(sel)

From b59dfb75fa72346f8268b83dedd2c1f9af460c71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Thu, 7 May 2020 14:14:59 +0200
Subject: [PATCH 111/181] Update disabled Pylint checks

---
 pylintrc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pylintrc b/pylintrc
index c52a4c2d0..129c7bf7d 100644
--- a/pylintrc
+++ b/pylintrc
@@ -13,6 +13,7 @@ disable=abstract-method,
         bad-mcs-classmethod-argument,
         bad-super-call,
         bad-whitespace,
+        bare-except,
         blacklisted-name,
         broad-except,
         c-extension-no-member,
@@ -39,6 +40,8 @@ disable=abstract-method,
         inconsistent-return-statements,
         inherit-non-class,
         invalid-name,
+        invalid-overridden-method,
+        isinstance-second-argument-not-valid-type,
         keyword-arg-before-vararg,
         line-too-long,
         logging-format-interpolation,
@@ -77,6 +80,7 @@ disable=abstract-method,
         too-many-ancestors,
         too-many-arguments,
         too-many-branches,
+        too-many-format-args,
         too-many-function-args,
         too-many-instance-attributes,
         too-many-lines,
@@ -87,6 +91,7 @@ disable=abstract-method,
         trailing-whitespace,
         unbalanced-tuple-unpacking,
         undefined-variable,
+        undefined-loop-variable,
         unexpected-special-method-signature,
         ungrouped-imports,
         unidiomatic-typecheck,

From 422e6429b56e42b8344a0e46c45f4106d374d024 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Thu, 7 May 2020 09:22:14 -0300
Subject: [PATCH 112/181] Add mising len check in spiderloader

---
 scrapy/spiderloader.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py
index 8dc89c2e9..92aed9b8e 100644
--- a/scrapy/spiderloader.py
+++ b/scrapy/spiderloader.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
-from collections import defaultdict
 import traceback
 import warnings
+from collections import defaultdict
 
 from zope.interface import implementer
 
@@ -16,6 +16,7 @@ class SpiderLoader:
     SpiderLoader is a class which locates and loads spiders
     in a Scrapy project.
     """
+
     def __init__(self, settings):
         self.spider_modules = settings.getlist('SPIDER_MODULES')
         self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY')
@@ -29,6 +30,7 @@ class SpiderLoader:
             dupes.extend([
                 "  {cls} named {name!r} (in {module})".format(module=mod, cls=cls, name=name)
                 for mod, cls in locations
+                if len(locations) > 1
             ])
 
         if dupes:
@@ -49,10 +51,9 @@ class SpiderLoader:
                     self._load_spiders(module)
             except ImportError:
                 if self.warn_only:
-                    msg = (
-                        "\n{tb}Could not load spiders from module '{modname}'. "
-                        "See above traceback for details.".format(modname=name, tb=traceback.format_exc())
-                    )
+                    msg = ("\n{tb}Could not load spiders from module '{modname}'. "
+                           "See above traceback for details.".format(
+                                modname=name, tb=traceback.format_exc()))
                     warnings.warn(msg, RuntimeWarning)
                 else:
                     raise
@@ -76,8 +77,10 @@ class SpiderLoader:
         """
         Return the list of spider names that can handle the given request.
         """
-        return [name for name, cls in self._spiders.items()
-                if cls.handles_request(request)]
+        return [
+            name for name, cls in self._spiders.items()
+            if cls.handles_request(request)
+        ]
 
     def list(self):
         """

From e0127a31230d4be13b1bd29e62d75c2954b47d9e Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Thu, 7 May 2020 12:48:43 -0300
Subject: [PATCH 113/181] Refactor warnings in spiderloader

---
 scrapy/spiderloader.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py
index 92aed9b8e..63da55718 100644
--- a/scrapy/spiderloader.py
+++ b/scrapy/spiderloader.py
@@ -35,9 +35,11 @@ class SpiderLoader:
 
         if dupes:
             dupes_string = "\n\n".join(dupes)
-            msg = ("There are several spiders with the same name:\n\n"
-                   "{}\n\n  This can cause unexpected behavior.".format(dupes_string))
-            warnings.warn(msg, UserWarning)
+            warnings.warn(
+                "There are several spiders with the same name:\n\n"
+                "{}\n\n  This can cause unexpected behavior.".format(dupes_string),
+                category=UserWarning,
+            )
 
     def _load_spiders(self, module):
         for spcls in iter_spider_classes(module):
@@ -51,10 +53,13 @@ class SpiderLoader:
                     self._load_spiders(module)
             except ImportError:
                 if self.warn_only:
-                    msg = ("\n{tb}Could not load spiders from module '{modname}'. "
-                           "See above traceback for details.".format(
-                                modname=name, tb=traceback.format_exc()))
-                    warnings.warn(msg, RuntimeWarning)
+                    warnings.warn(
+                        "\n{tb}Could not load spiders from module '{modname}'. "
+                        "See above traceback for details.".format(
+                            modname=name, tb=traceback.format_exc()
+                        ),
+                        category=RuntimeWarning,
+                    )
                 else:
                     raise
         self._check_name_duplicates()

From 5256eae60d3685de51c1f3891abe157e15d14def Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Thu, 7 May 2020 14:37:41 -0300
Subject: [PATCH 114/181] Meta class to handle isinstance checks for BaseItem

---
 pytest.ini                  |  2 +-
 scrapy/commands/parse.py    |  4 +--
 scrapy/contracts/default.py |  8 +++---
 scrapy/core/scraper.py      |  4 +--
 scrapy/exporters.py         |  4 +--
 scrapy/item.py              | 20 +++++++++++--
 scrapy/shell.py             |  5 ++--
 scrapy/utils/misc.py        |  4 +--
 scrapy/utils/serialize.py   |  4 +--
 tests/test_item.py          | 56 +++++++++++++++++++++++++++++++++----
 10 files changed, 85 insertions(+), 26 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 5a86ce2a7..292dbce41 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -204,7 +204,7 @@ flake8-ignore =
     tests/test_http_headers.py E501
     tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
     tests/test_http_response.py E501 E128
-    tests/test_item.py E128 F841
+    tests/test_item.py E128 F841 E501
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128 E124
     tests/test_loader.py E501 E741 E128 E117
diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index 1cefed106..098827ab9 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -5,7 +5,7 @@ from w3lib.url import is_url
 
 from scrapy.commands import ScrapyCommand
 from scrapy.http import Request
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 from scrapy.utils import display
 from scrapy.utils.conf import arglist_to_dict
 from scrapy.utils.spider import iterate_spider_output, spidercls_for_request
@@ -117,7 +117,7 @@ class Command(ScrapyCommand):
         items, requests = [], []
 
         for x in iterate_spider_output(callback(response, **cb_kwargs)):
-            if isinstance(x, (BaseItem, dict)):
+            if isinstance(x, (_BaseItem, dict)):
                 items.append(x)
             elif isinstance(x, Request):
                 requests.append(x)
diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py
index a1b0f8f22..cdc2bac15 100644
--- a/scrapy/contracts/default.py
+++ b/scrapy/contracts/default.py
@@ -1,6 +1,6 @@
 import json
 
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 from scrapy.http import Request
 from scrapy.exceptions import ContractFail
 
@@ -51,8 +51,8 @@ class ReturnsContract(Contract):
     objects = {
         'request': Request,
         'requests': Request,
-        'item': (BaseItem, dict),
-        'items': (BaseItem, dict),
+        'item': (_BaseItem, dict),
+        'items': (_BaseItem, dict),
     }
 
     def __init__(self, *args, **kwargs):
@@ -103,7 +103,7 @@ class ScrapesContract(Contract):
 
     def post_process(self, output):
         for x in output:
-            if isinstance(x, (BaseItem, dict)):
+            if isinstance(x, (_BaseItem, dict)):
                 missing = [arg for arg in self.args if arg not in x]
                 if missing:
                     raise ContractFail(
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index edbb4dd66..6785e103d 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -14,7 +14,7 @@ from scrapy.utils.log import logformatter_adapter, failure_to_exc_info
 from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
 from scrapy import signals
 from scrapy.http import Request, Response
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 from scrapy.core.spidermw import SpiderMiddlewareManager
 
 
@@ -191,7 +191,7 @@ class Scraper:
         """
         if isinstance(output, Request):
             self.crawler.engine.crawl(request=output, spider=spider)
-        elif isinstance(output, (BaseItem, dict)):
+        elif isinstance(output, (_BaseItem, dict)):
             self.slot.itemproc_size += 1
             dfd = self.itemproc.process_item(output, spider)
             dfd.addBoth(self._itemproc_finished, output, response, spider)
diff --git a/scrapy/exporters.py b/scrapy/exporters.py
index 0cb6cef98..4731b925a 100644
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@@ -12,7 +12,7 @@ from xml.sax.saxutils import XMLGenerator
 
 from scrapy.utils.serialize import ScrapyJSONEncoder
 from scrapy.utils.python import to_bytes, to_unicode, is_listlike
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 from scrapy.exceptions import ScrapyDeprecationWarning
 
 
@@ -312,7 +312,7 @@ class PythonItemExporter(BaseItemExporter):
         return serializer(value)
 
     def _serialize_value(self, value):
-        if isinstance(value, BaseItem):
+        if isinstance(value, _BaseItem):
             return self.export_item(value)
         if isinstance(value, dict):
             return dict(self._serialize_dict(value))
diff --git a/scrapy/item.py b/scrapy/item.py
index 46d20d017..f468ff86f 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -14,7 +14,23 @@ from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.trackref import object_ref
 
 
-class BaseItem(object_ref):
+class _BaseItem(object_ref):
+    """
+    Temporary class used internally to avoid the deprecation
+    warning raised by isinstance checks using BaseItem.
+    """
+    pass
+
+
+class _BaseItemMeta(ABCMeta):
+    def __instancecheck__(cls, instance):
+        if cls is BaseItem:
+            warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super().__instancecheck__(instance)
+
+
+class BaseItem(_BaseItem, metaclass=_BaseItemMeta):
     """
     Deprecated, please use :class:`scrapy.item.Item` instead
     """
@@ -30,7 +46,7 @@ class Field(dict):
     """Container of field metadata"""
 
 
-class ItemMeta(ABCMeta):
+class ItemMeta(_BaseItemMeta):
     """Metaclass_ of :class:`Item` that handles field definitions.
 
     .. _metaclass: https://realpython.com/python-metaclasses
diff --git a/scrapy/shell.py b/scrapy/shell.py
index 08ce89481..83afb74c9 100644
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@@ -13,7 +13,7 @@ from w3lib.url import any_to_uri
 from scrapy.crawler import Crawler
 from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 from scrapy.settings import Settings
 from scrapy.spiders import Spider
 from scrapy.utils.console import start_python_console
@@ -26,8 +26,7 @@ from scrapy.utils.console import DEFAULT_PYTHON_SHELLS
 
 class Shell:
 
-    relevant_classes = (Crawler, Spider, Request, Response, BaseItem,
-                        Settings)
+    relevant_classes = (Crawler, Spider, Request, Response, _BaseItem, Settings)
 
     def __init__(self, crawler, update_vars=None, code=None):
         self.crawler = crawler
diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py
index 52cfba208..bfe3ccd40 100644
--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@@ -14,10 +14,10 @@ from w3lib.html import replace_entities
 
 from scrapy.utils.datatypes import LocalWeakReferencedCache
 from scrapy.utils.python import flatten, to_unicode
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 
 
-_ITERABLE_SINGLE_VALUES = dict, BaseItem, str, bytes
+_ITERABLE_SINGLE_VALUES = dict, _BaseItem, str, bytes
 
 
 def arg_to_iter(arg):
diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py
index 9dd72ea71..bf73dfa18 100644
--- a/scrapy/utils/serialize.py
+++ b/scrapy/utils/serialize.py
@@ -5,7 +5,7 @@ import decimal
 from twisted.internet import defer
 
 from scrapy.http import Request, Response
-from scrapy.item import BaseItem
+from scrapy.item import _BaseItem
 
 
 class ScrapyJSONEncoder(json.JSONEncoder):
@@ -26,7 +26,7 @@ class ScrapyJSONEncoder(json.JSONEncoder):
             return str(o)
         elif isinstance(o, defer.Deferred):
             return str(o)
-        elif isinstance(o, BaseItem):
+        elif isinstance(o, _BaseItem):
             return dict(o)
         elif isinstance(o, Request):
             return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
diff --git a/tests/test_item.py b/tests/test_item.py
index f35a2b9f9..6fdd7e302 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -4,7 +4,7 @@ from unittest import mock
 from warnings import catch_warnings
 
 from scrapy.exceptions import ScrapyDeprecationWarning
-from scrapy.item import ABCMeta, BaseItem, DictItem, Field, Item, ItemMeta
+from scrapy.item import ABCMeta, _BaseItem, BaseItem, DictItem, Field, Item, ItemMeta
 
 
 PY36_PLUS = (sys.version_info.major >= 3) and (sys.version_info.minor >= 6)
@@ -334,29 +334,73 @@ class DictItemTest(unittest.TestCase):
 
 class BaseItemTest(unittest.TestCase):
 
+    def test_isinstance_check(self):
+
+        class SubclassedBaseItem(BaseItem):
+            pass
+
+        class SubclassedItem(Item):
+            pass
+
+        self.assertTrue(isinstance(BaseItem(), BaseItem))
+        self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem))
+        self.assertTrue(isinstance(Item(), BaseItem))
+        self.assertTrue(isinstance(SubclassedItem(), BaseItem))
+
+        # make sure internal checks using private _BaseItem class succeed
+        self.assertTrue(isinstance(BaseItem(), _BaseItem))
+        self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem))
+        self.assertTrue(isinstance(Item(), _BaseItem))
+        self.assertTrue(isinstance(SubclassedItem(), _BaseItem))
+
     def test_deprecation_warning(self):
+        """
+        Make sure deprecation warnings are logged whenever BaseItem is used,
+        either instantiated or in an isinstance check
+        """
         with catch_warnings(record=True) as warnings:
             BaseItem()
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
         with catch_warnings(record=True) as warnings:
+
             class SubclassedBaseItem(BaseItem):
                 pass
+
             SubclassedBaseItem()
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
 
+        with catch_warnings(record=True) as warnings:
+            self.assertFalse(isinstance("foo", BaseItem))
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+        with catch_warnings(record=True) as warnings:
+            self.assertTrue(isinstance(BaseItem(), BaseItem))
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
 
 class ItemNoDeprecationWarningTest(unittest.TestCase):
-
     def test_no_deprecation_warning(self):
+        """
+        Make sure deprecation warnings are NOT logged whenever BaseItem subclasses are used.
+        """
+        class SubclassedItem(Item):
+            pass
+
         with catch_warnings(record=True) as warnings:
             Item()
-            self.assertEqual(len(warnings), 0)
-        with catch_warnings(record=True) as warnings:
-            class SubclassedItem(Item):
-                pass
             SubclassedItem()
+            _BaseItem()
+            self.assertFalse(isinstance("foo", _BaseItem))
+            self.assertFalse(isinstance("foo", Item))
+            self.assertFalse(isinstance("foo", SubclassedItem))
+            self.assertTrue(isinstance(_BaseItem(), _BaseItem))
+            self.assertTrue(isinstance(Item(), Item))
+            self.assertTrue(isinstance(SubclassedItem(), SubclassedItem))
             self.assertEqual(len(warnings), 0)
 
 

From dcf7235f0e44c0199360cea9472b7a0da6bab1a7 Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Fri, 8 May 2020 01:48:23 +0530
Subject: [PATCH 115/181] Code sharing between crawl and runspider command

---
 scrapy/commands/common_commands.py | 29 +++++++++++++++++++++++++++++
 scrapy/commands/crawl.py           | 27 ++++-----------------------
 scrapy/commands/runspider.py       | 24 ++----------------------
 3 files changed, 35 insertions(+), 45 deletions(-)
 create mode 100644 scrapy/commands/common_commands.py

diff --git a/scrapy/commands/common_commands.py b/scrapy/commands/common_commands.py
new file mode 100644
index 000000000..7da7494ac
--- /dev/null
+++ b/scrapy/commands/common_commands.py
@@ -0,0 +1,29 @@
+from scrapy.commands import ScrapyCommand
+from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
+from scrapy.exceptions import UsageError
+
+
+class CommonCommands(ScrapyCommand):
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-a", dest="spargs", action="append", default=[],
+                          metavar="NAME=VALUE",
+                          help="set spider argument (may be repeated)")
+        parser.add_option("-o", "--output", metavar="FILE", action="append",
+                          help="dump scraped items into FILE"
+                          + "(use - for stdout)")
+        parser.add_option("-t", "--output-format", metavar="FORMAT",
+                          help="format to use for dumping items with -o")
+
+    def process_options(self, args, opts):
+        ScrapyCommand.process_options(self, args, opts)
+        try:
+            opts.spargs = arglist_to_dict(opts.spargs)
+        except ValueError:
+            raise UsageError(
+                "Invalid -a value, use -a NAME=VALUE", print_help=False)
+        if opts.output:
+            feeds = feed_process_params_from_cli(
+                self.settings, opts.output, opts.output_format)
+            self.settings.set('FEEDS', feeds, priority='cmdline')
diff --git a/scrapy/commands/crawl.py b/scrapy/commands/crawl.py
index 4b2f9484b..b477d7c71 100644
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@@ -1,9 +1,8 @@
-from scrapy.commands import ScrapyCommand
-from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
 from scrapy.exceptions import UsageError
+from scrapy.commands.common_commands import CommonCommands
 
 
-class Command(ScrapyCommand):
+class Command(CommonCommands):
 
     requires_project = True
 
@@ -13,30 +12,12 @@ class Command(ScrapyCommand):
     def short_desc(self):
         return "Run a spider"
 
-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE", action="append",
-                          help="dump scraped items into FILE (use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
-            self.settings.set('FEEDS', feeds, priority='cmdline')
-
     def run(self, args, opts):
         if len(args) < 1:
             raise UsageError()
         elif len(args) > 1:
-            raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported")
+            raise UsageError(
+                "running 'scrapy crawl' with more than one spider is no longer supported")
         spname = args[0]
 
         crawl_defer = self.crawler_process.crawl(spname, **opts.spargs)
diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py
index 62510609a..9959f6b0d 100644
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@@ -3,9 +3,8 @@ import os
 from importlib import import_module
 
 from scrapy.utils.spider import iter_spider_classes
-from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
-from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
+from scrapy.commands.common_commands import CommonCommands
 
 
 def _import_file(filepath):
@@ -24,7 +23,7 @@ def _import_file(filepath):
     return module
 
 
-class Command(ScrapyCommand):
+class Command(CommonCommands):
 
     requires_project = False
     default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
@@ -38,25 +37,6 @@ class Command(ScrapyCommand):
     def long_desc(self):
         return "Run the spider defined in the given file"
 
-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE", action="append",
-                          help="dump scraped items into FILE (use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
-            self.settings.set('FEEDS', feeds, priority='cmdline')
-
     def run(self, args, opts):
         if len(args) != 1:
             raise UsageError()

From cf09af787eafa6770bc5ab00bb0ee9759c75df23 Mon Sep 17 00:00:00 2001
From: Antonio Gordillo Toledo <antonio.gor@hotmail.com>
Date: Fri, 8 May 2020 06:45:19 -0700
Subject: [PATCH 116/181] Remove Python 2 encoding header from files (#4553)

---
 docs/conf.py                                        | 2 --
 scrapy/downloadermiddlewares/ajaxcrawl.py           | 1 -
 scrapy/spiderloader.py                              | 1 -
 scrapy/templates/project/module/items.py.tmpl       | 2 --
 scrapy/templates/project/module/middlewares.py.tmpl | 2 --
 scrapy/templates/project/module/pipelines.py.tmpl   | 2 --
 scrapy/templates/project/module/settings.py.tmpl    | 2 --
 scrapy/templates/spiders/basic.tmpl                 | 1 -
 scrapy/templates/spiders/crawl.tmpl                 | 1 -
 scrapy/templates/spiders/csvfeed.tmpl               | 1 -
 scrapy/templates/spiders/xmlfeed.tmpl               | 1 -
 scrapy/utils/log.py                                 | 2 --
 scrapy/utils/ssl.py                                 | 2 --
 tests/test_downloadermiddleware_redirect.py         | 2 --
 tests/test_downloadermiddleware_robotstxt.py        | 1 -
 tests/test_http_response.py                         | 1 -
 tests/test_pipeline_crawl.py                        | 1 -
 tests/test_responsetypes.py                         | 1 -
 tests/test_utils_deprecate.py                       | 1 -
 tests/test_utils_iterators.py                       | 1 -
 tests/test_utils_log.py                             | 1 -
 tests/test_utils_url.py                             | 1 -
 22 files changed, 30 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 813417bae..8ab38a090 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # Scrapy documentation build configuration file, created by
 # sphinx-quickstart on Mon Nov 24 12:02:52 2008.
 #
diff --git a/scrapy/downloadermiddlewares/ajaxcrawl.py b/scrapy/downloadermiddlewares/ajaxcrawl.py
index ad7a81e6b..4e12a5044 100644
--- a/scrapy/downloadermiddlewares/ajaxcrawl.py
+++ b/scrapy/downloadermiddlewares/ajaxcrawl.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import re
 import logging
 
diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py
index 63da55718..db4193430 100644
--- a/scrapy/spiderloader.py
+++ b/scrapy/spiderloader.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import traceback
 import warnings
 from collections import defaultdict
diff --git a/scrapy/templates/project/module/items.py.tmpl b/scrapy/templates/project/module/items.py.tmpl
index a12d08414..88a18331c 100644
--- a/scrapy/templates/project/module/items.py.tmpl
+++ b/scrapy/templates/project/module/items.py.tmpl
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 # Define here the models for your scraped items
 #
 # See documentation in:
diff --git a/scrapy/templates/project/module/middlewares.py.tmpl b/scrapy/templates/project/module/middlewares.py.tmpl
index b3e58ff94..6490f52a7 100644
--- a/scrapy/templates/project/module/middlewares.py.tmpl
+++ b/scrapy/templates/project/module/middlewares.py.tmpl
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 # Define here the models for your spider middleware
 #
 # See documentation in:
diff --git a/scrapy/templates/project/module/pipelines.py.tmpl b/scrapy/templates/project/module/pipelines.py.tmpl
index 4876526a9..ce0edd335 100644
--- a/scrapy/templates/project/module/pipelines.py.tmpl
+++ b/scrapy/templates/project/module/pipelines.py.tmpl
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
diff --git a/scrapy/templates/project/module/settings.py.tmpl b/scrapy/templates/project/module/settings.py.tmpl
index cb220eafc..a414b5fde 100644
--- a/scrapy/templates/project/module/settings.py.tmpl
+++ b/scrapy/templates/project/module/settings.py.tmpl
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 # Scrapy settings for $project_name project
 #
 # For simplicity, this file contains only settings considered important or
diff --git a/scrapy/templates/spiders/basic.tmpl b/scrapy/templates/spiders/basic.tmpl
index 1cfe9cc9d..e9112bc95 100644
--- a/scrapy/templates/spiders/basic.tmpl
+++ b/scrapy/templates/spiders/basic.tmpl
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import scrapy
 
 
diff --git a/scrapy/templates/spiders/crawl.tmpl b/scrapy/templates/spiders/crawl.tmpl
index 878425125..356496487 100644
--- a/scrapy/templates/spiders/crawl.tmpl
+++ b/scrapy/templates/spiders/crawl.tmpl
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import scrapy
 from scrapy.linkextractors import LinkExtractor
 from scrapy.spiders import CrawlSpider, Rule
diff --git a/scrapy/templates/spiders/csvfeed.tmpl b/scrapy/templates/spiders/csvfeed.tmpl
index c2e4bacfe..cbcbe9e2c 100644
--- a/scrapy/templates/spiders/csvfeed.tmpl
+++ b/scrapy/templates/spiders/csvfeed.tmpl
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from scrapy.spiders import CSVFeedSpider
 
 
diff --git a/scrapy/templates/spiders/xmlfeed.tmpl b/scrapy/templates/spiders/xmlfeed.tmpl
index 863c9772f..5aa2aa8b0 100644
--- a/scrapy/templates/spiders/xmlfeed.tmpl
+++ b/scrapy/templates/spiders/xmlfeed.tmpl
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from scrapy.spiders import XMLFeedSpider
 
 
diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py
index 5998dc33b..203842fc8 100644
--- a/scrapy/utils/log.py
+++ b/scrapy/utils/log.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import logging
 import sys
 import warnings
diff --git a/scrapy/utils/ssl.py b/scrapy/utils/ssl.py
index 6e81b33ff..c3c5e329b 100644
--- a/scrapy/utils/ssl.py
+++ b/scrapy/utils/ssl.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import OpenSSL
 import OpenSSL._util as pyOpenSSLutil
 
diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
index 551e124ab..61c9eddbc 100644
--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import unittest
 
 from scrapy.downloadermiddlewares.redirect import RedirectMiddleware, MetaRefreshMiddleware
diff --git a/tests/test_downloadermiddleware_robotstxt.py b/tests/test_downloadermiddleware_robotstxt.py
index a1645ed96..b9452a0e7 100644
--- a/tests/test_downloadermiddleware_robotstxt.py
+++ b/tests/test_downloadermiddleware_robotstxt.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from unittest import mock
 
 from twisted.internet import reactor, error
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index 522ec4875..43d6d936a 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import unittest
 
 from w3lib.encoding import resolve_encoding
diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py
index 24c516473..e2578a9c9 100644
--- a/tests/test_pipeline_crawl.py
+++ b/tests/test_pipeline_crawl.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import shutil
 
diff --git a/tests/test_responsetypes.py b/tests/test_responsetypes.py
index 8cdf7a176..9e63ac924 100644
--- a/tests/test_responsetypes.py
+++ b/tests/test_responsetypes.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import unittest
 from scrapy.responsetypes import responsetypes
 
diff --git a/tests/test_utils_deprecate.py b/tests/test_utils_deprecate.py
index b17e17f2f..adef66c1d 100644
--- a/tests/test_utils_deprecate.py
+++ b/tests/test_utils_deprecate.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import inspect
 import unittest
 from unittest import mock
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index 46aaaecbc..69339256e 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 
 from twisted.trial import unittest
diff --git a/tests/test_utils_log.py b/tests/test_utils_log.py
index 21100aeb8..25cd904bc 100644
--- a/tests/test_utils_log.py
+++ b/tests/test_utils_log.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import sys
 import logging
 import unittest
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 1f8388957..16e7449c9 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import unittest
 
 from scrapy.spiders import Spider

From b852fff6f82e24c535c0dd9b4ef7fa78e7946497 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 15:19:22 -0300
Subject: [PATCH 117/181] Style changes in link extractor

---
 scrapy/linkextractors/lxmlhtml.py | 52 ++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py
index ceb37c5f1..9ebf8e7c7 100644
--- a/scrapy/linkextractors/lxmlhtml.py
+++ b/scrapy/linkextractors/lxmlhtml.py
@@ -28,8 +28,9 @@ def _nons(tag):
 
 
 class LxmlParserLinkExtractor:
-    def __init__(self, tag="a", attr="href", process=None, unique=False,
-                 strip=True, canonicalized=False):
+    def __init__(
+        self, tag="a", attr="href", process=None, unique=False, strip=True, canonicalized=False
+    ):
         self.scan_tag = tag if callable(tag) else lambda t: t == tag
         self.scan_attr = attr if callable(attr) else lambda a: a == attr
         self.process_attr = process if callable(process) else lambda v: v
@@ -93,10 +94,23 @@ class LxmlParserLinkExtractor:
 
 class LxmlLinkExtractor(FilteringLinkExtractor):
 
-    def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
-                 tags=('a', 'area'), attrs=('href',), canonicalize=False,
-                 unique=True, process_value=None, deny_extensions=None, restrict_css=(),
-                 strip=True, restrict_text=None):
+    def __init__(
+        self,
+        allow=(),
+        deny=(),
+        allow_domains=(),
+        deny_domains=(),
+        restrict_xpaths=(),
+        tags=('a', 'area'),
+        attrs=('href',),
+        canonicalize=False,
+        unique=True,
+        process_value=None,
+        deny_extensions=None,
+        restrict_css=(),
+        strip=True,
+        restrict_text=None,
+    ):
         tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
         lx = LxmlParserLinkExtractor(
             tag=lambda x: x in tags,
@@ -106,12 +120,18 @@ class LxmlLinkExtractor(FilteringLinkExtractor):
             strip=strip,
             canonicalized=canonicalize
         )
-
-        super(LxmlLinkExtractor, self).__init__(lx, allow=allow, deny=deny,
-                                                allow_domains=allow_domains, deny_domains=deny_domains,
-                                                restrict_xpaths=restrict_xpaths, restrict_css=restrict_css,
-                                                canonicalize=canonicalize, deny_extensions=deny_extensions,
-                                                restrict_text=restrict_text)
+        super(LxmlLinkExtractor, self).__init__(
+            link_extractor=lx,
+            allow=allow,
+            deny=deny,
+            allow_domains=allow_domains,
+            deny_domains=deny_domains,
+            restrict_xpaths=restrict_xpaths,
+            restrict_css=restrict_css,
+            canonicalize=canonicalize,
+            deny_extensions=deny_extensions,
+            restrict_text=restrict_text,
+        )
 
     def extract_links(self, response):
         """Returns a list of :class:`~scrapy.link.Link` objects from the
@@ -124,9 +144,11 @@ class LxmlLinkExtractor(FilteringLinkExtractor):
         """
         base_url = get_base_url(response)
         if self.restrict_xpaths:
-            docs = [subdoc
-                    for x in self.restrict_xpaths
-                    for subdoc in response.xpath(x)]
+            docs = [
+                subdoc
+                for x in self.restrict_xpaths
+                for subdoc in response.xpath(x)
+            ]
         else:
             docs = [response.selector]
         all_links = []

From 3ebf2a0d82b0ae5b9e701972ba2a9e1420c6d2c7 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 15:17:33 -0300
Subject: [PATCH 118/181] Remove lambdas in link extractor

---
 scrapy/linkextractors/lxmlhtml.py | 28 +++++++++++++++++-----------
 tests/test_linkextractors.py      |  5 +++++
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py
index 9ebf8e7c7..1615d44d7 100644
--- a/scrapy/linkextractors/lxmlhtml.py
+++ b/scrapy/linkextractors/lxmlhtml.py
@@ -1,6 +1,8 @@
 """
 Link extractor based on lxml.html
 """
+import operator
+from functools import partial
 from urllib.parse import urljoin
 
 import lxml.etree as etree
@@ -8,10 +10,10 @@ from w3lib.html import strip_html5_whitespace
 from w3lib.url import canonicalize_url, safe_url_string
 
 from scrapy.link import Link
+from scrapy.linkextractors import FilteringLinkExtractor
 from scrapy.utils.misc import arg_to_iter, rel_has_nofollow
 from scrapy.utils.python import unique as unique_list
 from scrapy.utils.response import get_base_url
-from scrapy.linkextractors import FilteringLinkExtractor
 
 
 # from lxml/src/lxml/html/__init__.py
@@ -27,20 +29,24 @@ def _nons(tag):
     return tag
 
 
+def _identity(x):
+    return x
+
+
+def _canonicalize_link_url(link):
+    return canonicalize_url(link.url, keep_fragments=True)
+
+
 class LxmlParserLinkExtractor:
     def __init__(
         self, tag="a", attr="href", process=None, unique=False, strip=True, canonicalized=False
     ):
-        self.scan_tag = tag if callable(tag) else lambda t: t == tag
-        self.scan_attr = attr if callable(attr) else lambda a: a == attr
-        self.process_attr = process if callable(process) else lambda v: v
+        self.scan_tag = tag if callable(tag) else partial(operator.eq, tag)
+        self.scan_attr = attr if callable(attr) else partial(operator.eq, attr)
+        self.process_attr = process if callable(process) else _identity
         self.unique = unique
         self.strip = strip
-        if canonicalized:
-            self.link_key = lambda link: link.url
-        else:
-            self.link_key = lambda link: canonicalize_url(link.url,
-                                                          keep_fragments=True)
+        self.link_key = operator.attrgetter("url") if canonicalized else _canonicalize_link_url
 
     def _iter_links(self, document):
         for el in document.iter(etree.Element):
@@ -113,8 +119,8 @@ class LxmlLinkExtractor(FilteringLinkExtractor):
     ):
         tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
         lx = LxmlParserLinkExtractor(
-            tag=lambda x: x in tags,
-            attr=lambda x: x in attrs,
+            tag=partial(operator.contains, tags),
+            attr=partial(operator.contains, attrs),
             unique=unique,
             process=process_value,
             strip=strip,
diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py
index 68e8514ba..46d8c13af 100644
--- a/tests/test_linkextractors.py
+++ b/tests/test_linkextractors.py
@@ -1,3 +1,4 @@
+import pickle
 import re
 import unittest
 from warnings import catch_warnings
@@ -462,6 +463,10 @@ class Base:
                 Link(url='ftp://www.external.com/', text=u'An Item', fragment='', nofollow=False),
             ])
 
+        def test_pickle_extractor(self):
+            lx = self.extractor_cls()
+            self.assertIsInstance(pickle.loads(pickle.dumps(lx)), self.extractor_cls)
+
 
 class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
     extractor_cls = LxmlLinkExtractor

From 81d0b2f61ac119efab4d5970bd235dbc288496ef Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 16:23:53 -0300
Subject: [PATCH 119/181] Flake8: remove E111

---
 pytest.ini                 | 4 ++--
 scrapy/selector/unified.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 604bbfe1d..20639baa3 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -102,7 +102,7 @@ flake8-ignore =
     scrapy/pipelines/media.py E501
     # scrapy/selector
     scrapy/selector/__init__.py F403
-    scrapy/selector/unified.py E501 E111
+    scrapy/selector/unified.py E501
     # scrapy/settings
     scrapy/settings/__init__.py E501
     scrapy/settings/default_settings.py E501 E114 E116
@@ -224,7 +224,7 @@ flake8-ignore =
     tests/test_spider.py E501
     tests/test_spidermiddleware.py E501
     tests/test_spidermiddleware_httperror.py E128 E501 E121
-    tests/test_spidermiddleware_offsite.py E501 E128 E111
+    tests/test_spidermiddleware_offsite.py E501 E128
     tests/test_spidermiddleware_output_chain.py E501
     tests/test_spidermiddleware_referer.py E501 F841 E501 E121
     tests/test_squeues.py E501 E741
diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py
index a08955dc9..85a9bb526 100644
--- a/scrapy/selector/unified.py
+++ b/scrapy/selector/unified.py
@@ -65,9 +65,9 @@ class Selector(_ParselSelector, object_ref):
     selectorlist_cls = SelectorList
 
     def __init__(self, response=None, text=None, type=None, root=None, **kwargs):
-        if not(response is None or text is None):
-           raise ValueError('%s.__init__() received both response and text'
-                            % self.__class__.__name__)
+        if response is not None and text is not None:
+            raise ValueError('%s.__init__() received both response and text'
+                             % self.__class__.__name__)
 
         st = _st(response, type or self._default_type)
 

From 1a157f2e26274455405b7e272bab5aede0fa59fa Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 16:27:21 -0300
Subject: [PATCH 120/181] Flake8: remove E116

---
 pytest.ini                | 4 ++--
 scrapy/pipelines/files.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 20639baa3..295609435 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -97,7 +97,7 @@ flake8-ignore =
     scrapy/loader/processors.py E501
     # scrapy/pipelines
     scrapy/pipelines/__init__.py E501
-    scrapy/pipelines/files.py E116 E501
+    scrapy/pipelines/files.py E501
     scrapy/pipelines/images.py E501
     scrapy/pipelines/media.py E501
     # scrapy/selector
@@ -105,7 +105,7 @@ flake8-ignore =
     scrapy/selector/unified.py E501
     # scrapy/settings
     scrapy/settings/__init__.py E501
-    scrapy/settings/default_settings.py E501 E114 E116
+    scrapy/settings/default_settings.py E501 E114
     scrapy/settings/deprecated.py E501
     # scrapy/spidermiddlewares
     scrapy/spidermiddlewares/httperror.py E501
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index a9066986b..cd3e29057 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -83,8 +83,7 @@ class S3FilesStore:
     AWS_USE_SSL = None
     AWS_VERIFY = None
 
-    POLICY = 'private'  # Overriden from settings.FILES_STORE_S3_ACL in
-                        # FilesPipeline.from_settings.
+    POLICY = 'private'  # Overriden from settings.FILES_STORE_S3_ACL in FilesPipeline.from_settings
     HEADERS = {
         'Cache-Control': 'max-age=172800',
     }

From 83ce82f400d9686f7fbaa526f21f635fca8491de Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 16:28:26 -0300
Subject: [PATCH 121/181] Flake8: remove E114 and E117 (unused)

---
 pytest.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 295609435..59cce9ac4 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -105,7 +105,7 @@ flake8-ignore =
     scrapy/selector/unified.py E501
     # scrapy/settings
     scrapy/settings/__init__.py E501
-    scrapy/settings/default_settings.py E501 E114
+    scrapy/settings/default_settings.py E501
     scrapy/settings/deprecated.py E501
     # scrapy/spidermiddlewares
     scrapy/spidermiddlewares/httperror.py E501
@@ -207,7 +207,7 @@ flake8-ignore =
     tests/test_item.py E128 F841
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128
-    tests/test_loader.py E501 E741 E128 E117
+    tests/test_loader.py E501 E741 E128
     tests/test_logformatter.py E128 E501
     tests/test_mail.py E128 E501
     tests/test_middleware.py E501 E128

From c2c3054ac13838b89ad5062d6583cc52dd0ba317 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 16:32:02 -0300
Subject: [PATCH 122/181] Flake8: remove E121

---
 pytest.ini                               | 4 ++--
 tests/test_spidermiddleware_httperror.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 59cce9ac4..61139f7fe 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -223,10 +223,10 @@ flake8-ignore =
     tests/test_selector.py E501
     tests/test_spider.py E501
     tests/test_spidermiddleware.py E501
-    tests/test_spidermiddleware_httperror.py E128 E501 E121
+    tests/test_spidermiddleware_httperror.py E128 E501
     tests/test_spidermiddleware_offsite.py E501 E128
     tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E501 E121
+    tests/test_spidermiddleware_referer.py E501 F841 E501
     tests/test_squeues.py E501 E741
     tests/test_utils_asyncio.py E501
     tests/test_utils_conf.py E501 E128
diff --git a/tests/test_spidermiddleware_httperror.py b/tests/test_spidermiddleware_httperror.py
index 6b61df56f..29584f21b 100644
--- a/tests/test_spidermiddleware_httperror.py
+++ b/tests/test_spidermiddleware_httperror.py
@@ -21,10 +21,10 @@ class _HttpErrorSpider(MockServerSpider):
     def __init__(self, *args, **kwargs):
         super(_HttpErrorSpider, self).__init__(*args, **kwargs)
         self.start_urls = [
-           self.mockserver.url("/status?n=200"),
-           self.mockserver.url("/status?n=404"),
-           self.mockserver.url("/status?n=402"),
-           self.mockserver.url("/status?n=500"),
+            self.mockserver.url("/status?n=200"),
+            self.mockserver.url("/status?n=404"),
+            self.mockserver.url("/status?n=402"),
+            self.mockserver.url("/status?n=500"),
         ]
         self.failed = set()
         self.skipped = set()

From f689e917bf2caa2c48def7399317dd019264663d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 19:44:05 -0300
Subject: [PATCH 123/181] Update Item docstring

---
 scrapy/item.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/scrapy/item.py b/scrapy/item.py
index f468ff86f..b9a0acd3d 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -79,8 +79,7 @@ class DictItem(MutableMapping, BaseItem):
 
     def __new__(cls, *args, **kwargs):
         if issubclass(cls, DictItem) and not issubclass(cls, Item):
-            warn('scrapy.item.DictItem is deprecated, please use '
-                 'scrapy.item.Item instead',
+            warn('scrapy.item.DictItem is deprecated, please use scrapy.item.Item instead',
                  ScrapyDeprecationWarning, stacklevel=2)
         return super(DictItem, cls).__new__(cls, *args, **kwargs)
 
@@ -147,6 +146,11 @@ class Item(DictItem, metaclass=ItemMeta):
     If you need instances of a custom class to be considered items by Scrapy,
     you must inherit from either :class:`Item` or :class:`dict`.
 
+    Items offer the ability to declare :class:`Field` attributes, which can be
+    used to define metadata and control the way data is processed internally.
+    Please refer to the :ref:`documentation about fields <topics-items-fields>`
+    for additional information.
+
     Unlike instances of :class:`dict`, instances of :class:`Item` may be
     :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
     """

From 92792cc3f7c66a81a9e7a0576be0848266950c7e Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Sat, 9 May 2020 17:28:10 +0530
Subject: [PATCH 124/181] Moved common_commands.py to __init__.py

---
 scrapy/commands/__init__.py        | 52 ++++++++++++++++++++++++------
 scrapy/commands/common_commands.py | 29 -----------------
 scrapy/commands/crawl.py           |  4 +--
 scrapy/commands/runspider.py       |  4 +--
 4 files changed, 47 insertions(+), 42 deletions(-)
 delete mode 100644 scrapy/commands/common_commands.py

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 9f8e6986a..b95d395a1 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -5,7 +5,7 @@ import os
 from optparse import OptionGroup
 from twisted.python import failure
 
-from scrapy.utils.conf import arglist_to_dict
+from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
 from scrapy.exceptions import UsageError
 
 
@@ -59,18 +59,19 @@ class ScrapyCommand:
         """
         group = OptionGroup(parser, "Global Options")
         group.add_option("--logfile", metavar="FILE",
-            help="log file. if omitted stderr will be used")
+                         help="log file. if omitted stderr will be used")
         group.add_option("-L", "--loglevel", metavar="LEVEL", default=None,
-            help="log level (default: %s)" % self.settings['LOG_LEVEL'])
+                         help="log level (default: %s)" % self.settings['LOG_LEVEL'])
         group.add_option("--nolog", action="store_true",
-            help="disable logging completely")
+                         help="disable logging completely")
         group.add_option("--profile", metavar="FILE", default=None,
-            help="write python cProfile stats to FILE")
+                         help="write python cProfile stats to FILE")
         group.add_option("--pidfile", metavar="FILE",
-            help="write process ID to FILE")
+                         help="write process ID to FILE")
         group.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE",
-            help="set/override setting (may be repeated)")
-        group.add_option("--pdb", action="store_true", help="enable pdb on failure")
+                         help="set/override setting (may be repeated)")
+        group.add_option("--pdb", action="store_true",
+                         help="enable pdb on failure")
 
         parser.add_option_group(group)
 
@@ -79,7 +80,8 @@ class ScrapyCommand:
             self.settings.setdict(arglist_to_dict(opts.set),
                                   priority='cmdline')
         except ValueError:
-            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)
+            raise UsageError(
+                "Invalid -s value, use -s NAME=VALUE", print_help=False)
 
         if opts.logfile:
             self.settings.set('LOG_ENABLED', True, priority='cmdline')
@@ -104,3 +106,35 @@ class ScrapyCommand:
         Entry point for running commands
         """
         raise NotImplementedError
+
+
+''''
+The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
+performing common functionality between crawl.py and runspider.py
+'''
+
+
+class BaseRunSpiderCommands(ScrapyCommand):
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-a", dest="spargs", action="append", default=[],
+                          metavar="NAME=VALUE",
+                          help="set spider argument (may be repeated)")
+        parser.add_option("-o", "--output", metavar="FILE", action="append",
+                          help="dump scraped items into FILE"
+                          + "(use - for stdout)")
+        parser.add_option("-t", "--output-format", metavar="FORMAT",
+                          help="format to use for dumping items with -o")
+
+    def process_options(self, args, opts):
+        ScrapyCommand.process_options(self, args, opts)
+        try:
+            opts.spargs = arglist_to_dict(opts.spargs)
+        except ValueError:
+            raise UsageError(
+                "Invalid -a value, use -a NAME=VALUE", print_help=False)
+        if opts.output:
+            feeds = feed_process_params_from_cli(
+                self.settings, opts.output, opts.output_format)
+            self.settings.set('FEEDS', feeds, priority='cmdline')
diff --git a/scrapy/commands/common_commands.py b/scrapy/commands/common_commands.py
deleted file mode 100644
index 7da7494ac..000000000
--- a/scrapy/commands/common_commands.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from scrapy.commands import ScrapyCommand
-from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
-from scrapy.exceptions import UsageError
-
-
-class CommonCommands(ScrapyCommand):
-
-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[],
-                          metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE", action="append",
-                          help="dump scraped items into FILE"
-                          + "(use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError(
-                "Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            feeds = feed_process_params_from_cli(
-                self.settings, opts.output, opts.output_format)
-            self.settings.set('FEEDS', feeds, priority='cmdline')
diff --git a/scrapy/commands/crawl.py b/scrapy/commands/crawl.py
index b477d7c71..ca19b1367 100644
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@@ -1,8 +1,8 @@
 from scrapy.exceptions import UsageError
-from scrapy.commands.common_commands import CommonCommands
+from scrapy.commands import BaseRunSpiderCommands
 
 
-class Command(CommonCommands):
+class Command(BaseRunSpiderCommands):
 
     requires_project = True
 
diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py
index 9959f6b0d..bd24a369e 100644
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@@ -4,7 +4,7 @@ from importlib import import_module
 
 from scrapy.utils.spider import iter_spider_classes
 from scrapy.exceptions import UsageError
-from scrapy.commands.common_commands import CommonCommands
+from scrapy.commands import BaseRunSpiderCommands
 
 
 def _import_file(filepath):
@@ -23,7 +23,7 @@ def _import_file(filepath):
     return module
 
 
-class Command(CommonCommands):
+class Command(BaseRunSpiderCommands):
 
     requires_project = False
     default_settings = {'SPIDER_LOADER_WARN_ONLY': True}

From ed4f4f84082f7023734d1200d88e8f1aec57904c Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Sun, 10 May 2020 00:08:34 +0530
Subject: [PATCH 125/181] Applied suggested format changes

---
 scrapy/commands/__init__.py | 22 ++++++++--------------
 scrapy/commands/crawl.py    |  5 ++---
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index b95d395a1..1dada1ceb 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -80,8 +80,7 @@ class ScrapyCommand:
             self.settings.setdict(arglist_to_dict(opts.set),
                                   priority='cmdline')
         except ValueError:
-            raise UsageError(
-                "Invalid -s value, use -s NAME=VALUE", print_help=False)
+            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)
 
         if opts.logfile:
             self.settings.set('LOG_ENABLED', True, priority='cmdline')
@@ -108,18 +107,15 @@ class ScrapyCommand:
         raise NotImplementedError
 
 
-''''
-The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
-performing common functionality between crawl.py and runspider.py
-'''
-
-
 class BaseRunSpiderCommands(ScrapyCommand):
+    ''''
+    The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
+    performing common functionality between crawl.py and runspider.py
+    '''
 
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[],
-                          metavar="NAME=VALUE",
+        parser.add_option("-a", dest="spargs", action="append", default=[],metavar="NAME=VALUE",
                           help="set spider argument (may be repeated)")
         parser.add_option("-o", "--output", metavar="FILE", action="append",
                           help="dump scraped items into FILE"
@@ -132,9 +128,7 @@ class BaseRunSpiderCommands(ScrapyCommand):
         try:
             opts.spargs = arglist_to_dict(opts.spargs)
         except ValueError:
-            raise UsageError(
-                "Invalid -a value, use -a NAME=VALUE", print_help=False)
+            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
         if opts.output:
-            feeds = feed_process_params_from_cli(
-                self.settings, opts.output, opts.output_format)
+            feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
             self.settings.set('FEEDS', feeds, priority='cmdline')
diff --git a/scrapy/commands/crawl.py b/scrapy/commands/crawl.py
index ca19b1367..c39c5a9ef 100644
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@@ -1,5 +1,5 @@
-from scrapy.exceptions import UsageError
 from scrapy.commands import BaseRunSpiderCommands
+from scrapy.exceptions import UsageError
 
 
 class Command(BaseRunSpiderCommands):
@@ -16,8 +16,7 @@ class Command(BaseRunSpiderCommands):
         if len(args) < 1:
             raise UsageError()
         elif len(args) > 1:
-            raise UsageError(
-                "running 'scrapy crawl' with more than one spider is no longer supported")
+            raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported")
         spname = args[0]
 
         crawl_defer = self.crawler_process.crawl(spname, **opts.spargs)

From 02ac6664a5c3510104b67f7cfc9a71c831d5bd5f Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Sun, 10 May 2020 00:26:48 +0530
Subject: [PATCH 126/181] Travis CI fixes

---
 scrapy/commands/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 1dada1ceb..38b1b77bd 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -115,7 +115,7 @@ class BaseRunSpiderCommands(ScrapyCommand):
 
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[],metavar="NAME=VALUE",
+        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                           help="set spider argument (may be repeated)")
         parser.add_option("-o", "--output", metavar="FILE", action="append",
                           help="dump scraped items into FILE"

From 1bfbcc618090d4e71e06471b78aaac3071a51956 Mon Sep 17 00:00:00 2001
From: Will Beaufoy <will@willbeaufoy.net>
Date: Sun, 10 May 2020 13:48:09 +0100
Subject: [PATCH 127/181] Extend hoverxref_roles to custom crossrefs (#4495)

---
 docs/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/conf.py b/docs/conf.py
index 8ab38a090..3ae709a04 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -300,3 +300,4 @@ hoverxref_role_types = {
     "mod": "tooltip",
     "ref": "tooltip",
 }
+hoverxref_roles = ['command', 'reqmeta', 'setting', 'signal']

From abfdc1b5425997f1aa69b29465c6ab2324f37fd0 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 11 May 2020 13:20:06 -0300
Subject: [PATCH 128/181] Update docstring for Item class

---
 scrapy/item.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/scrapy/item.py b/scrapy/item.py
index b9a0acd3d..97dfed976 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -138,18 +138,20 @@ class Item(DictItem, metaclass=ItemMeta):
     """
     Base class for scraped items.
 
-    In Scrapy, an object is considered an *item* if it is an instance of either
-    :class:`Item` or :class:`dict`. For example, when the output of a
+    In Scrapy, an object is considered an ``item`` if it is an instance of either
+    :class:`Item` or :class:`dict`, or any subclass. For example, when the output of a
     spider callback is evaluated, only instances of :class:`Item` or
     :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
 
     If you need instances of a custom class to be considered items by Scrapy,
     you must inherit from either :class:`Item` or :class:`dict`.
 
-    Items offer the ability to declare :class:`Field` attributes, which can be
-    used to define metadata and control the way data is processed internally.
-    Please refer to the :ref:`documentation about fields <topics-items-fields>`
-    for additional information.
+    Items must declare :class:`Field` attributes, which are processed and stored
+    in the ``fields`` attribute. This restricts the set of allowed field names
+    and prevents typos, raising ``KeyError`` when referring to undefined fields.
+    Additionally, fields can be used to define metadata and control the way
+    data is processed internally. Please refer to the :ref:`documentation
+    about fields <topics-items-fields>` for additional information.
 
     Unlike instances of :class:`dict`, instances of :class:`Item` may be
     :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.

From 6f8758624c8d3df7472948ce9805601f6037548a Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 11 May 2020 13:50:34 -0300
Subject: [PATCH 129/181] Flake8: remove F841

---
 pytest.ini                             | 22 ++++++++++------------
 tests/pipelines.py                     |  2 +-
 tests/test_crawler.py                  |  6 +++---
 tests/test_dependencies.py             |  2 +-
 tests/test_extension_telnet.py         |  2 --
 tests/test_feedexport.py               |  1 -
 tests/test_item.py                     |  8 ++++----
 tests/test_pipeline_images.py          |  2 +-
 tests/test_spidermiddleware_referer.py |  3 +--
 tests/test_utils_defer.py              |  4 ++--
 tests/test_utils_deprecate.py          |  4 ++--
 tests/test_utils_signal.py             |  2 +-
 12 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 32fd76445..fee54dcbd 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -164,14 +164,13 @@ flake8-ignore =
     scrapy/robotstxt.py E501
     scrapy/shell.py E501
     scrapy/signalmanager.py E501
-    scrapy/spiderloader.py F841 E501
+    scrapy/spiderloader.py E501
     scrapy/squeues.py E128
     scrapy/squeues.py E501
     scrapy/statscollectors.py E501
     # tests
     tests/__init__.py E402 E501
     tests/mockserver.py E501
-    tests/pipelines.py F841
     tests/spiders.py E501
     tests/test_closespider.py E501
     tests/test_command_fetch.py E501
@@ -180,8 +179,8 @@ flake8-ignore =
     tests/test_commands.py E128 E501
     tests/test_contracts.py E501 E128
     tests/test_crawl.py E501 E741
-    tests/test_crawler.py F841 E501
-    tests/test_dependencies.py F841 E501
+    tests/test_crawler.py E501
+    tests/test_dependencies.py E501
     tests/test_downloader_handlers.py E128 E501
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
@@ -199,13 +198,12 @@ flake8-ignore =
     tests/test_dupefilters.py E501 E741 E128
     tests/test_engine.py E501 E128
     tests/test_exporters.py E501 E128
-    tests/test_extension_telnet.py F841
-    tests/test_feedexport.py E501 F841
+    tests/test_feedexport.py E501
     tests/test_http_cookies.py E501
     tests/test_http_headers.py E501
     tests/test_http_request.py E402 E501 E128 E128
     tests/test_http_response.py E501 E128
-    tests/test_item.py E128 F841
+    tests/test_item.py E128
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128
     tests/test_loader.py E501 E741 E128
@@ -214,7 +212,7 @@ flake8-ignore =
     tests/test_middleware.py E501 E128
     tests/test_pipeline_crawl.py E501 E128
     tests/test_pipeline_files.py E501
-    tests/test_pipeline_images.py F841 E501
+    tests/test_pipeline_images.py E501
     tests/test_pipeline_media.py E501 E741 E128
     tests/test_proxy_connect.py E501 E741
     tests/test_request_cb_kwargs.py E501
@@ -227,14 +225,14 @@ flake8-ignore =
     tests/test_spidermiddleware_httperror.py E128 E501
     tests/test_spidermiddleware_offsite.py E501 E128
     tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E501
+    tests/test_spidermiddleware_referer.py E501 E501
     tests/test_squeues.py E501 E741
     tests/test_utils_asyncio.py E501
     tests/test_utils_conf.py E501 E128
     tests/test_utils_curl.py E501
     tests/test_utils_datatypes.py E402 E501
-    tests/test_utils_defer.py E501 F841
-    tests/test_utils_deprecate.py F841 E501
+    tests/test_utils_defer.py E501
+    tests/test_utils_deprecate.py E501
     tests/test_utils_http.py E501 E128
     tests/test_utils_iterators.py E501 E128
     tests/test_utils_log.py E741
@@ -242,7 +240,7 @@ flake8-ignore =
     tests/test_utils_reqser.py E501 E128
     tests/test_utils_request.py E501 E128
     tests/test_utils_response.py E501
-    tests/test_utils_signal.py E741 F841
+    tests/test_utils_signal.py E741
     tests/test_utils_sitemap.py E128 E501
     tests/test_utils_url.py E501 E501
     tests/test_webclient.py E501 E128 E402
diff --git a/tests/pipelines.py b/tests/pipelines.py
index cf677cc17..fed2af7d3 100644
--- a/tests/pipelines.py
+++ b/tests/pipelines.py
@@ -6,7 +6,7 @@ Some pipelines used for testing
 class ZeroDivisionErrorPipeline:
 
     def open_spider(self, spider):
-        a = 1 / 0
+        1 / 0
 
     def process_item(self, item, spider):
         return item
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 9151278a5..ecc0cd7af 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -87,7 +87,7 @@ class CrawlerLoggingTestCase(unittest.TestCase):
         class MySpider(scrapy.Spider):
             name = 'spider'
 
-        crawler = Crawler(MySpider, {})
+        Crawler(MySpider, {})
         assert get_scrapy_root_handler() is None
 
     def test_spider_custom_settings_log_level(self):
@@ -240,13 +240,13 @@ class CrawlerRunnerHasSpider(unittest.TestCase):
 
     def test_crawler_runner_asyncio_enabled_true(self):
         if self.reactor_pytest == 'asyncio':
-            runner = CrawlerRunner(settings={
+            CrawlerRunner(settings={
                 "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
             })
         else:
             msg = r"The installed reactor \(.*?\) does not match the requested one \(.*?\)"
             with self.assertRaisesRegex(Exception, msg):
-                runner = CrawlerRunner(settings={
+                CrawlerRunner(settings={
                     "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
                 })
 
diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py
index a169acbe6..5d0a1d0c9 100644
--- a/tests/test_dependencies.py
+++ b/tests/test_dependencies.py
@@ -6,7 +6,7 @@ class ScrapyUtilsTest(unittest.TestCase):
     def test_required_openssl_version(self):
         try:
             module = import_module('OpenSSL')
-        except ImportError as ex:
+        except ImportError:
             raise unittest.SkipTest("OpenSSL is not available")
 
         if hasattr(module, '__version__'):
diff --git a/tests/test_extension_telnet.py b/tests/test_extension_telnet.py
index 873a97248..1e716b94a 100644
--- a/tests/test_extension_telnet.py
+++ b/tests/test_extension_telnet.py
@@ -11,8 +11,6 @@ class TelnetExtensionTest(unittest.TestCase):
     def _get_console_and_portal(self, settings=None):
         crawler = get_crawler(settings_dict=settings)
         console = TelnetConsole(crawler)
-        username = console.username
-        password = console.password
 
         # This function has some side effects we don't need for this test
         console._get_telnet_vars = lambda: {}
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
index e02b0b840..cbc81bc35 100644
--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@@ -715,7 +715,6 @@ class FeedExportTest(unittest.TestCase):
     @defer.inlineCallbacks
     def test_export_encoding(self):
         items = [dict({'foo': u'Test\xd6'})]
-        header = ['foo']
 
         formats = {
             'json': u'[{"foo": "Test\\u00d6"}]'.encode('utf-8'),
diff --git a/tests/test_item.py b/tests/test_item.py
index 4017f6e84..58735cc5c 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -264,12 +264,12 @@ class ItemTest(unittest.TestCase):
         """Make sure the DictItem deprecation warning is not issued for
         Item"""
         with catch_warnings(record=True) as warnings:
-            item = Item()
+            Item()
             self.assertEqual(len(warnings), 0)
 
             class SubclassedItem(Item):
                 pass
-            subclassed_item = SubclassedItem()
+            SubclassedItem()
             self.assertEqual(len(warnings), 0)
 
 
@@ -321,13 +321,13 @@ class DictItemTest(unittest.TestCase):
 
     def test_deprecation_warning(self):
         with catch_warnings(record=True) as warnings:
-            dict_item = DictItem()
+            DictItem()
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
         with catch_warnings(record=True) as warnings:
             class SubclassedDictItem(DictItem):
                 pass
-            subclassed_dict_item = SubclassedDictItem()
+            SubclassedDictItem()
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
 
diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py
index 5018d6802..5ba03ff4c 100644
--- a/tests/test_pipeline_images.py
+++ b/tests/test_pipeline_images.py
@@ -15,7 +15,7 @@ from scrapy.utils.python import to_bytes
 skip = False
 try:
     from PIL import Image
-except ImportError as e:
+except ImportError:
     skip = 'Missing Python Imaging Library, install https://pypi.python.org/pypi/Pillow'
 else:
     encoders = set(('jpeg_encoder', 'jpeg_decoder'))
diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py
index 41589177a..ca765518b 100644
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@@ -459,7 +459,6 @@ class TestRequestMetaSettingFallback(TestCase):
         target = 'http://www.example.com'
 
         for settings, response_headers, request_meta, policy_class, check_warning in self.params[3:]:
-            spider = Spider('foo')
             mw = RefererMiddleware(Settings(settings))
 
             response = Response(origin, headers=response_headers)
@@ -511,7 +510,7 @@ class TestSettingsPolicyByName(TestCase):
     def test_invalid_name(self):
         settings = Settings({'REFERRER_POLICY': 'some-custom-unknown-policy'})
         with self.assertRaises(RuntimeError):
-            mw = RefererMiddleware(settings)
+            RefererMiddleware(settings)
 
 
 class TestPolicyHeaderPredecence001(MixinUnsafeUrl, TestRefererMiddleware):
diff --git a/tests/test_utils_defer.py b/tests/test_utils_defer.py
index a3b6e64f1..2d4b88121 100644
--- a/tests/test_utils_defer.py
+++ b/tests/test_utils_defer.py
@@ -64,7 +64,7 @@ class DeferUtilsTest(unittest.TestCase):
         gotexc = False
         try:
             yield process_chain([cb1, cb_fail, cb3], 'res', 'v1', 'v2')
-        except TypeError as e:
+        except TypeError:
             gotexc = True
         self.assertTrue(gotexc)
 
@@ -104,7 +104,7 @@ class IterErrbackTest(unittest.TestCase):
         def iterbad():
             for x in range(10):
                 if x == 5:
-                    a = 1 / 0
+                    1 / 0
                 yield x
 
         errors = []
diff --git a/tests/test_utils_deprecate.py b/tests/test_utils_deprecate.py
index adef66c1d..35d35b45d 100644
--- a/tests/test_utils_deprecate.py
+++ b/tests/test_utils_deprecate.py
@@ -25,7 +25,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
 
     def test_no_warning_on_definition(self):
         with warnings.catch_warnings(record=True) as w:
-            Deprecated = create_deprecated_class('Deprecated', NewName)
+            create_deprecated_class('Deprecated', NewName)
 
         w = self._mywarnings(w)
         self.assertEqual(w, [])
@@ -217,7 +217,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
     def test_deprecate_a_class_with_custom_metaclass(self):
         Meta1 = type('Meta1', (type,), {})
         New = Meta1('New', (), {})
-        Deprecated = create_deprecated_class('Deprecated', New)
+        create_deprecated_class('Deprecated', New)
 
     def test_deprecate_subclass_of_deprecated_class(self):
         with warnings.catch_warnings(record=True) as w:
diff --git a/tests/test_utils_signal.py b/tests/test_utils_signal.py
index bb211dc60..c83c9398c 100644
--- a/tests/test_utils_signal.py
+++ b/tests/test_utils_signal.py
@@ -44,7 +44,7 @@ class SendCatchLogTest(unittest.TestCase):
 
     def error_handler(self, arg, handlers_called):
         handlers_called.add(self.error_handler)
-        a = 1 / 0
+        1 / 0
 
     def ok_handler(self, arg, handlers_called):
         handlers_called.add(self.ok_handler)

From cb8140a42a27ede87b0880372024f2f1804618b8 Mon Sep 17 00:00:00 2001
From: nsirletti <40069643+nsirletti@users.noreply.github.com>
Date: Mon, 11 May 2020 20:20:31 +0200
Subject: [PATCH 130/181] Deprecate Response.body_as_unicode() (#4555)

Co-authored-by: Nicolas Sirletti <n.sirletti@gmail.com>
---
 docs/topics/request-response.rst | 5 -----
 scrapy/http/response/text.py     | 5 +++++
 tests/test_http_response.py      | 9 +++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 024f46466..15a83f453 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -834,11 +834,6 @@ TextResponse objects
 
     .. automethod:: TextResponse.follow_all
 
-    .. method:: TextResponse.body_as_unicode()
-
-        The same as :attr:`text`, but available as a method. This method is
-        kept for backward compatibility; please prefer ``response.text``.
-
 
 HtmlResponse objects
 --------------------
diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
index 2f0f3820c..5614e6e55 100644
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@@ -5,6 +5,7 @@ discovering (through HTTP headers) to base Response class.
 See documentation in docs/topics/request-response.rst
 """
 
+import warnings
 from contextlib import suppress
 from typing import Generator
 from urllib.parse import urljoin
@@ -14,6 +15,7 @@ from w3lib.encoding import (html_body_declared_encoding, html_to_unicode,
                             http_content_type_encoding, resolve_encoding)
 from w3lib.html import strip_html5_whitespace
 
+from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.http import Request
 from scrapy.http.response import Response
 from scrapy.utils.python import memoizemethod_noargs, to_unicode
@@ -61,6 +63,9 @@ class TextResponse(Response):
 
     def body_as_unicode(self):
         """Return body as unicode"""
+        warnings.warn('Response.body_as_unicode() is deprecated, '
+                      'please use Response.text instead.',
+                      ScrapyDeprecationWarning)
         return self.text
 
     @property
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index 43d6d936a..2f73afe56 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -1,7 +1,9 @@
 import unittest
+from warnings import catch_warnings
 
 from w3lib.encoding import resolve_encoding
 
+from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.http import (Request, Response, TextResponse, HtmlResponse,
                          XmlResponse, Headers)
 from scrapy.selector import Selector
@@ -660,6 +662,13 @@ class TextResponseTest(BaseResponseTest):
         with self.assertRaises(ValueError):
             response.follow_all(css='a[href*="example.com"]', xpath='//a[contains(@href, "example.com")]')
 
+    def test_body_as_unicode_deprecation_warning(self):
+        with catch_warnings(record=True) as warnings:
+            r1 = self.response_class("http://www.example.com", body=u'Hello', encoding='utf-8')
+            self.assertEqual(r1.body_as_unicode(), u'Hello')
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
 
 class HtmlResponseTest(TextResponseTest):
 

From cf9be5344a89dd8e14f8241ec69de9c984ec1e05 Mon Sep 17 00:00:00 2001
From: willbeaufoy <will@willbeaufoy.net>
Date: Mon, 11 May 2020 19:35:25 +0100
Subject: [PATCH 131/181] Prevent create_instance() returning None (#4532)

Currently create_instance() can return None if an extension is
incorrectly implemented, but the extension will still show up as
enabled in the logs. This can cause confusion, as in the linked bug.

This change prevents this occurring by throwing an error if
create_instance() will return None.
---
 scrapy/utils/misc.py              | 15 ++++++++++++---
 tests/test_utils_misc/__init__.py | 14 +++++++++++---
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py
index 52cfba208..ab7cf9deb 100644
--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@@ -137,17 +137,26 @@ def create_instance(objcls, settings, crawler, *args, **kwargs):
     ``*args`` and ``**kwargs`` are forwarded to the constructors.
 
     Raises ``ValueError`` if both ``settings`` and ``crawler`` are ``None``.
+
+    Raises ``TypeError`` if the resulting instance is ``None`` (e.g. if an
+    extension has not been implemented correctly).
     """
     if settings is None:
         if crawler is None:
             raise ValueError("Specify at least one of settings and crawler.")
         settings = crawler.settings
     if crawler and hasattr(objcls, 'from_crawler'):
-        return objcls.from_crawler(crawler, *args, **kwargs)
+        instance = objcls.from_crawler(crawler, *args, **kwargs)
+        method_name = 'from_crawler'
     elif hasattr(objcls, 'from_settings'):
-        return objcls.from_settings(settings, *args, **kwargs)
+        instance = objcls.from_settings(settings, *args, **kwargs)
+        method_name = 'from_settings'
     else:
-        return objcls(*args, **kwargs)
+        instance = objcls(*args, **kwargs)
+        method_name = '__new__'
+    if instance is None:
+        raise TypeError("%s.%s returned None" % (objcls.__qualname__, method_name))
+    return instance
 
 
 @contextmanager
diff --git a/tests/test_utils_misc/__init__.py b/tests/test_utils_misc/__init__.py
index 6f945cd01..015a0e5a2 100644
--- a/tests/test_utils_misc/__init__.py
+++ b/tests/test_utils_misc/__init__.py
@@ -114,8 +114,12 @@ class UtilsMiscTestCase(unittest.TestCase):
         #   2. with from_settings() constructor
         #   3. with from_crawler() constructor
         #   4. with from_settings() and from_crawler() constructor
-        spec_sets = ([], ['from_settings'], ['from_crawler'],
-                     ['from_settings', 'from_crawler'])
+        spec_sets = (
+            ['__qualname__'],
+            ['__qualname__', 'from_settings'],
+            ['__qualname__', 'from_crawler'],
+            ['__qualname__', 'from_settings', 'from_crawler'],
+        )
         for specs in spec_sets:
             m = mock.MagicMock(spec_set=specs)
             _test_with_settings(m, settings)
@@ -123,7 +127,7 @@ class UtilsMiscTestCase(unittest.TestCase):
             _test_with_crawler(m, settings, crawler)
 
         # Check adoption of crawler settings
-        m = mock.MagicMock(spec_set=['from_settings'])
+        m = mock.MagicMock(spec_set=['__qualname__', 'from_settings'])
         create_instance(m, None, crawler, *args, **kwargs)
         m.from_settings.assert_called_once_with(crawler.settings, *args,
                                                 **kwargs)
@@ -131,6 +135,10 @@ class UtilsMiscTestCase(unittest.TestCase):
         with self.assertRaises(ValueError):
             create_instance(m, None, None)
 
+        m.from_settings.return_value = None
+        with self.assertRaises(TypeError):
+            create_instance(m, settings, None)
+
     def test_set_environ(self):
         assert os.environ.get('some_test_environ') is None
         with set_environ(some_test_environ='test_value'):

From e01c30f0d54ba838aa3212634cddf3630f77b2d4 Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Tue, 12 May 2020 01:05:20 +0530
Subject: [PATCH 132/181] Update scrapy/commands/__init__.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrián Chaves <adrian@chaves.io>
---
 scrapy/commands/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 38b1b77bd..2ba796562 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -108,10 +108,10 @@ class ScrapyCommand:
 
 
 class BaseRunSpiderCommands(ScrapyCommand):
-    ''''
+    """
     The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
     performing common functionality between crawl.py and runspider.py
-    '''
+    """
 
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)

From 28d223dd87fb655743f67087be0009b094ddad10 Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Tue, 12 May 2020 15:28:22 +0530
Subject: [PATCH 133/181] Update __init__.py

---
 scrapy/commands/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 2ba796562..99276515b 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -118,8 +118,7 @@ class BaseRunSpiderCommands(ScrapyCommand):
         parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                           help="set spider argument (may be repeated)")
         parser.add_option("-o", "--output", metavar="FILE", action="append",
-                          help="dump scraped items into FILE"
-                          + "(use - for stdout)")
+                          help="dump scraped items into FILE (use - for stdout)")
         parser.add_option("-t", "--output-format", metavar="FORMAT",
                           help="format to use for dumping items with -o")
 

From 07e125f4c916fcd2d4d978a89c7d7e508cc63620 Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Tue, 12 May 2020 16:31:56 +0530
Subject: [PATCH 134/181] Travis CI fixes in __init__.py

---
 scrapy/commands/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 99276515b..0ddbf2ca0 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -112,7 +112,6 @@ class BaseRunSpiderCommands(ScrapyCommand):
     The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
     performing common functionality between crawl.py and runspider.py
     """
-
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",

From 97532a9144cc011e5a6ede84fa7d317c3af60131 Mon Sep 17 00:00:00 2001
From: Aditya Kumar <k.aditya00@gmail.com>
Date: Tue, 12 May 2020 20:40:09 +0530
Subject: [PATCH 135/181] test(spiderloader): no duplicate spider names (#4560)

---
 tests/test_spiderloader/__init__.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/test_spiderloader/__init__.py b/tests/test_spiderloader/__init__.py
index d8be6e277..b6fb27ffe 100644
--- a/tests/test_spiderloader/__init__.py
+++ b/tests/test_spiderloader/__init__.py
@@ -137,6 +137,11 @@ class DuplicateSpiderNameLoaderTest(unittest.TestCase):
             msg = str(w[0].message)
             self.assertIn("several spiders with the same name", msg)
             self.assertIn("'spider3'", msg)
+            self.assertTrue(msg.count("'spider3'") == 2)
+
+            self.assertNotIn("'spider1'", msg)
+            self.assertNotIn("'spider2'", msg)
+            self.assertNotIn("'spider4'", msg)
 
             spiders = set(spider_loader.list())
             self.assertEqual(spiders, set(['spider1', 'spider2', 'spider3', 'spider4']))
@@ -156,7 +161,13 @@ class DuplicateSpiderNameLoaderTest(unittest.TestCase):
             msg = str(w[0].message)
             self.assertIn("several spiders with the same name", msg)
             self.assertIn("'spider1'", msg)
+            self.assertTrue(msg.count("'spider1'") == 2)
+
             self.assertIn("'spider2'", msg)
+            self.assertTrue(msg.count("'spider2'") == 2)
+
+            self.assertNotIn("'spider3'", msg)
+            self.assertNotIn("'spider4'", msg)
 
             spiders = set(spider_loader.list())
             self.assertEqual(spiders, set(['spider1', 'spider2', 'spider3', 'spider4']))

From 8d1269bcbc81fa0bb5a69068e07bdbcb0dba8889 Mon Sep 17 00:00:00 2001
From: Aditya Kumar <k.aditya00@gmail.com>
Date: Wed, 13 May 2020 00:12:28 +0530
Subject: [PATCH 136/181] Cover chompjs in documentation (#4562)

---
 docs/topics/dynamic-content.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/topics/dynamic-content.rst b/docs/topics/dynamic-content.rst
index 3b85bfe8a..495111b56 100644
--- a/docs/topics/dynamic-content.rst
+++ b/docs/topics/dynamic-content.rst
@@ -184,6 +184,18 @@ data from it:
     >>> json.loads(json_data)
     {'field': 'value'}
 
+-   chompjs_ provides an API to parse JavaScript objects into a :class:`dict`.
+
+    For example, if the JavaScript code contains
+    ``var data = {field: "value", secondField: "second value"};``
+    you can extract that data as follows:
+
+    >>> import chompjs
+    >>> javascript = response.css('script::text').get()
+    >>> data = chompjs.parse_js_object(javascript)
+    >>> data
+    {'field': 'value', 'secondField': 'second value'}
+
 -   Otherwise, use js2xml_ to convert the JavaScript code into an XML document
     that you can parse using :ref:`selectors <topics-selectors>`.
 
@@ -241,6 +253,7 @@ along with `scrapy-selenium`_ for seamless integration.
 
 
 .. _AJAX: https://en.wikipedia.org/wiki/Ajax_%28programming%29
+.. _chompjs: https://github.com/Nykakin/chompjs
 .. _CSS: https://en.wikipedia.org/wiki/Cascading_Style_Sheets
 .. _curl: https://curl.haxx.se/
 .. _headless browser: https://en.wikipedia.org/wiki/Headless_browser

From 8971878c1d4d95aff10a1e910688502de892662b Mon Sep 17 00:00:00 2001
From: marc <Marc>
Date: Wed, 13 May 2020 14:11:10 +0200
Subject: [PATCH 137/181] fix new detected flake8 cases

---
 scrapy/core/spidermw.py | 2 +-
 scrapy/utils/console.py | 2 ++
 scrapy/utils/python.py  | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/scrapy/core/spidermw.py b/scrapy/core/spidermw.py
index 87d08cab7..35264a92b 100644
--- a/scrapy/core/spidermw.py
+++ b/scrapy/core/spidermw.py
@@ -19,7 +19,7 @@ def _isiterable(possible_iterator):
 
 
 def _fname(f):
-    return "%s.%s".format(
+    return "{}.{}".format(
         f.__self__.__class__.__name__,
         f.__func__.__name__
     )
diff --git a/scrapy/utils/console.py b/scrapy/utils/console.py
index c7a2ace88..133261fd7 100644
--- a/scrapy/utils/console.py
+++ b/scrapy/utils/console.py
@@ -28,6 +28,7 @@ def _embed_ipython_shell(namespace={}, banner=''):
 def _embed_bpython_shell(namespace={}, banner=''):
     """Start a bpython shell"""
     import bpython
+
     @wraps(_embed_bpython_shell)
     def wrapper(namespace=namespace, banner=''):
         bpython.embed(locals_=namespace, banner=banner)
@@ -37,6 +38,7 @@ def _embed_bpython_shell(namespace={}, banner=''):
 def _embed_ptpython_shell(namespace={}, banner=''):
     """Start a ptpython shell"""
     import ptpython.repl
+
     @wraps(_embed_ptpython_shell)
     def wrapper(namespace=namespace, banner=''):
         print(banner)
diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py
index 3d02d9478..9c1f3c2fe 100644
--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@@ -152,6 +152,7 @@ def memoizemethod_noargs(method):
     weak reference to its object
     """
     cache = weakref.WeakKeyDictionary()
+
     @wraps(method)
     def new_method(self, *args, **kwargs):
         if self not in cache:

From df8a1d1c0108b15bebdad064d8b7bc61a894f062 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 8 May 2020 20:09:35 -0300
Subject: [PATCH 138/181] Flake8: Remove E741

---
 pytest.ini                                 | 18 +++++------
 scrapy/shell.py                            |  5 ++-
 scrapy/spiders/sitemap.py                  |  3 +-
 scrapy/utils/log.py                        |  2 +-
 tests/test_command_version.py              |  6 ++--
 tests/test_crawl.py                        | 36 +++++++++++-----------
 tests/test_downloadermiddleware_cookies.py |  8 ++---
 tests/test_dupefilters.py                  | 10 +++---
 tests/test_pipeline_media.py               | 10 +++---
 tests/test_proxy_connect.py                | 16 +++++-----
 tests/test_squeues.py                      | 24 +++++++--------
 tests/test_utils_log.py                    | 24 ++++++---------
 tests/test_utils_misc/__init__.py          |  4 +--
 tests/test_utils_signal.py                 | 12 ++++----
 14 files changed, 86 insertions(+), 92 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index fee54dcbd..c141d49a4 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -178,13 +178,13 @@ flake8-ignore =
     tests/test_command_shell.py E501 E128
     tests/test_commands.py E128 E501
     tests/test_contracts.py E501 E128
-    tests/test_crawl.py E501 E741
+    tests/test_crawl.py E501
     tests/test_crawler.py E501
     tests/test_dependencies.py E501
     tests/test_downloader_handlers.py E128 E501
     tests/test_downloadermiddleware.py E501
     tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E741 E501 E128
+    tests/test_downloadermiddleware_cookies.py E501 E128
     tests/test_downloadermiddleware_defaultheaders.py E501
     tests/test_downloadermiddleware_downloadtimeout.py E501
     tests/test_downloadermiddleware_httpcache.py E501
@@ -195,7 +195,7 @@ flake8-ignore =
     tests/test_downloadermiddleware_retry.py E501 E128
     tests/test_downloadermiddleware_robotstxt.py E501
     tests/test_downloadermiddleware_stats.py E501
-    tests/test_dupefilters.py E501 E741 E128
+    tests/test_dupefilters.py E501 E128
     tests/test_engine.py E501 E128
     tests/test_exporters.py E501 E128
     tests/test_feedexport.py E501
@@ -206,15 +206,15 @@ flake8-ignore =
     tests/test_item.py E128
     tests/test_link.py E501
     tests/test_linkextractors.py E501 E128
-    tests/test_loader.py E501 E741 E128
+    tests/test_loader.py E128 E501 E741
     tests/test_logformatter.py E128 E501
     tests/test_mail.py E128 E501
     tests/test_middleware.py E501 E128
     tests/test_pipeline_crawl.py E501 E128
     tests/test_pipeline_files.py E501
     tests/test_pipeline_images.py E501
-    tests/test_pipeline_media.py E501 E741 E128
-    tests/test_proxy_connect.py E501 E741
+    tests/test_pipeline_media.py E501 E128
+    tests/test_proxy_connect.py E501
     tests/test_request_cb_kwargs.py E501
     tests/test_responsetypes.py E501
     tests/test_robotstxt_interface.py E501 E501
@@ -225,8 +225,8 @@ flake8-ignore =
     tests/test_spidermiddleware_httperror.py E128 E501
     tests/test_spidermiddleware_offsite.py E501 E128
     tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 E501
-    tests/test_squeues.py E501 E741
+    tests/test_spidermiddleware_referer.py E501
+    tests/test_squeues.py E501
     tests/test_utils_asyncio.py E501
     tests/test_utils_conf.py E501 E128
     tests/test_utils_curl.py E501
@@ -235,12 +235,10 @@ flake8-ignore =
     tests/test_utils_deprecate.py E501
     tests/test_utils_http.py E501 E128
     tests/test_utils_iterators.py E501 E128
-    tests/test_utils_log.py E741
     tests/test_utils_python.py E501
     tests/test_utils_reqser.py E501 E128
     tests/test_utils_request.py E501 E128
     tests/test_utils_response.py E501
-    tests/test_utils_signal.py E741
     tests/test_utils_sitemap.py E128 E501
     tests/test_utils_url.py E501 E501
     tests/test_webclient.py E501 E128 E402
diff --git a/scrapy/shell.py b/scrapy/shell.py
index 08ce89481..2a3e13ddd 100644
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@@ -146,14 +146,13 @@ class Shell:
         b.append("Useful shortcuts:")
         if self.inthread:
             b.append("  fetch(url[, redirect=True]) "
-                     "Fetch URL and update local objects "
-                     "(by default, redirects are followed)")
+                     "Fetch URL and update local objects (by default, redirects are followed)")
             b.append("  fetch(req)                  "
                      "Fetch a scrapy.Request and update local objects ")
         b.append("  shelp()           Shell help (print this help)")
         b.append("  view(response)    View response in a browser")
 
-        return "\n".join("[s] %s" % l for l in b)
+        return "\n".join("[s] %s" % line for line in b)
 
     def _is_relevant(self, value):
         return isinstance(value, self.relevant_classes)
diff --git a/scrapy/spiders/sitemap.py b/scrapy/spiders/sitemap.py
index d368c7108..c5360bfa7 100644
--- a/scrapy/spiders/sitemap.py
+++ b/scrapy/spiders/sitemap.py
@@ -96,5 +96,4 @@ def iterloc(it, alt=False):
 
         # Also consider alternate URLs (xhtml:link rel="alternate")
         if alt and 'alternate' in d:
-            for l in d['alternate']:
-                yield l
+            yield from d['alternate']
diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py
index 203842fc8..6392862ee 100644
--- a/scrapy/utils/log.py
+++ b/scrapy/utils/log.py
@@ -37,7 +37,7 @@ class TopLevelFormatter(logging.Filter):
         self.loggers = loggers or []
 
     def filter(self, record):
-        if any(record.name.startswith(l + '.') for l in self.loggers):
+        if any(record.name.startswith(logger + '.') for logger in self.loggers):
             record.name = record.name.split('.', 1)[0]
         return True
 
diff --git a/tests/test_command_version.py b/tests/test_command_version.py
index 4ac7fb786..99c01c2b7 100644
--- a/tests/test_command_version.py
+++ b/tests/test_command_version.py
@@ -23,8 +23,10 @@ class VersionTest(ProcessTest, unittest.TestCase):
     def test_verbose_output(self):
         encoding = getattr(sys.stdout, 'encoding') or 'utf-8'
         _, out, _ = yield self.execute(['-v'])
-        headers = [l.partition(":")[0].strip()
-                   for l in out.strip().decode(encoding).splitlines()]
+        headers = [
+            line.partition(":")[0].strip()
+            for line in out.strip().decode(encoding).splitlines()
+        ]
         self.assertEqual(headers, ['Scrapy', 'lxml', 'libxml2',
                                    'cssselect', 'parsel', 'w3lib',
                                    'Twisted', 'Python', 'pyOpenSSL',
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 4215ca56c..84f80d103 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -104,44 +104,44 @@ class CrawlTestCase(TestCase):
     @defer.inlineCallbacks
     def test_retry_503(self):
         crawler = self.runner.create_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(self.mockserver.url("/status?n=503"), mockserver=self.mockserver)
-        self._assert_retried(l)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_retry_conn_failed(self):
         crawler = self.runner.create_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl("http://localhost:65432/status?n=503", mockserver=self.mockserver)
-        self._assert_retried(l)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_retry_dns_error(self):
         crawler = self.runner.create_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             # try to fetch the homepage of a non-existent domain
             yield crawler.crawl("http://dns.resolution.invalid./", mockserver=self.mockserver)
-        self._assert_retried(l)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_start_requests_bug_before_yield(self):
-        with LogCapture('scrapy', level=logging.ERROR) as l:
+        with LogCapture('scrapy', level=logging.ERROR) as log:
             crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
             yield crawler.crawl(fail_before_yield=1, mockserver=self.mockserver)
 
-        self.assertEqual(len(l.records), 1)
-        record = l.records[0]
+        self.assertEqual(len(log.records), 1)
+        record = log.records[0]
         self.assertIsNotNone(record.exc_info)
         self.assertIs(record.exc_info[0], ZeroDivisionError)
 
     @defer.inlineCallbacks
     def test_start_requests_bug_yielding(self):
-        with LogCapture('scrapy', level=logging.ERROR) as l:
+        with LogCapture('scrapy', level=logging.ERROR) as log:
             crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
             yield crawler.crawl(fail_yielding=1, mockserver=self.mockserver)
 
-        self.assertEqual(len(l.records), 1)
-        record = l.records[0]
+        self.assertEqual(len(log.records), 1)
+        record = log.records[0]
         self.assertIsNotNone(record.exc_info)
         self.assertIs(record.exc_info[0], ZeroDivisionError)
 
@@ -187,25 +187,25 @@ foo body
 with multiples lines
 '''})
         crawler = self.runner.create_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(self.mockserver.url("/raw?{0}".format(query)), mockserver=self.mockserver)
-        self.assertEqual(str(l).count("Got response 200"), 1)
+        self.assertEqual(str(log).count("Got response 200"), 1)
 
     @defer.inlineCallbacks
     def test_retry_conn_lost(self):
         # connection lost after receiving data
         crawler = self.runner.create_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(self.mockserver.url("/drop?abort=0"), mockserver=self.mockserver)
-        self._assert_retried(l)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_retry_conn_aborted(self):
         # connection lost before receiving data
         crawler = self.runner.create_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(self.mockserver.url("/drop?abort=1"), mockserver=self.mockserver)
-        self._assert_retried(l)
+        self._assert_retried(log)
 
     def _assert_retried(self, log):
         self.assertEqual(str(log).count("Retrying"), 2)
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index f86c50f50..d54434c8f 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -63,7 +63,7 @@ class CookiesMiddlewareTest(TestCase):
         mw = CookiesMiddleware.from_crawler(crawler)
         with LogCapture('scrapy.downloadermiddlewares.cookies',
                         propagate=False,
-                        level=logging.DEBUG) as l:
+                        level=logging.DEBUG) as log:
             req = Request('http://scrapytest.org/')
             res = Response('http://scrapytest.org/',
                            headers={'Set-Cookie': 'C1=value1; path=/'})
@@ -71,7 +71,7 @@ class CookiesMiddlewareTest(TestCase):
             req2 = Request('http://scrapytest.org/sub1/')
             mw.process_request(req2, crawler.spider)
 
-            l.check(
+            log.check(
                 ('scrapy.downloadermiddlewares.cookies',
                  'DEBUG',
                  'Received cookies from: <200 http://scrapytest.org/>\n'
@@ -87,7 +87,7 @@ class CookiesMiddlewareTest(TestCase):
         mw = CookiesMiddleware.from_crawler(crawler)
         with LogCapture('scrapy.downloadermiddlewares.cookies',
                         propagate=False,
-                        level=logging.DEBUG) as l:
+                        level=logging.DEBUG) as log:
             req = Request('http://scrapytest.org/')
             res = Response('http://scrapytest.org/',
                            headers={'Set-Cookie': 'C1=value1; path=/'})
@@ -95,7 +95,7 @@ class CookiesMiddlewareTest(TestCase):
             req2 = Request('http://scrapytest.org/sub1/')
             mw.process_request(req2, crawler.spider)
 
-            l.check()
+            log.check()
 
     def test_do_not_break_on_non_utf8_header(self):
         req = Request('http://scrapytest.org/')
diff --git a/tests/test_dupefilters.py b/tests/test_dupefilters.py
index 7426107c1..ee1794f4f 100644
--- a/tests/test_dupefilters.py
+++ b/tests/test_dupefilters.py
@@ -160,7 +160,7 @@ class RFPDupeFilterTest(unittest.TestCase):
             shutil.rmtree(path)
 
     def test_log(self):
-        with LogCapture() as l:
+        with LogCapture() as log:
             settings = {'DUPEFILTER_DEBUG': False,
                         'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
             crawler = get_crawler(SimpleSpider, settings_dict=settings)
@@ -177,7 +177,7 @@ class RFPDupeFilterTest(unittest.TestCase):
             dupefilter.log(r2, spider)
 
             assert crawler.stats.get_value('dupefilter/filtered') == 2
-            l.check_present(('scrapy.dupefilters', 'DEBUG',
+            log.check_present(('scrapy.dupefilters', 'DEBUG',
                 ('Filtered duplicate request: <GET http://scrapytest.org/index.html>'
                 ' - no more duplicates will be shown'
                 ' (see DUPEFILTER_DEBUG to show all duplicates)')))
@@ -185,7 +185,7 @@ class RFPDupeFilterTest(unittest.TestCase):
             dupefilter.close('finished')
 
     def test_log_debug(self):
-        with LogCapture() as l:
+        with LogCapture() as log:
             settings = {'DUPEFILTER_DEBUG': True,
                         'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
             crawler = get_crawler(SimpleSpider, settings_dict=settings)
@@ -203,10 +203,10 @@ class RFPDupeFilterTest(unittest.TestCase):
             dupefilter.log(r2, spider)
 
             assert crawler.stats.get_value('dupefilter/filtered') == 2
-            l.check_present(('scrapy.dupefilters', 'DEBUG',
+            log.check_present(('scrapy.dupefilters', 'DEBUG',
                 ('Filtered duplicate request: <GET http://scrapytest.org/index.html>'
                 ' (referer: None)')))
-            l.check_present(('scrapy.dupefilters', 'DEBUG',
+            log.check_present(('scrapy.dupefilters', 'DEBUG',
                 ('Filtered duplicate request: <GET http://scrapytest.org/index.html>'
                 ' (referer: http://scrapytest.org/INDEX.html)')))
 
diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py
index 949f0dea1..d7b0d32b2 100644
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@@ -63,21 +63,21 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
         fail = Failure(Exception())
         results = [(True, 1), (False, fail)]
 
-        with LogCapture() as l:
+        with LogCapture() as log:
             new_item = self.pipe.item_completed(results, item, self.info)
 
         assert new_item is item
-        assert len(l.records) == 1
-        record = l.records[0]
+        assert len(log.records) == 1
+        record = log.records[0]
         assert record.levelname == 'ERROR'
         self.assertTupleEqual(record.exc_info, failure_to_exc_info(fail))
 
         # disable failure logging and check again
         self.pipe.LOG_FAILED_RESULTS = False
-        with LogCapture() as l:
+        with LogCapture() as log:
             new_item = self.pipe.item_completed(results, item, self.info)
         assert new_item is item
-        assert len(l.records) == 0
+        assert len(log.records) == 0
 
     @inlineCallbacks
     def test_default_process_item(self):
diff --git a/tests/test_proxy_connect.py b/tests/test_proxy_connect.py
index 188ec68dd..4763a5417 100644
--- a/tests/test_proxy_connect.py
+++ b/tests/test_proxy_connect.py
@@ -76,35 +76,35 @@ class ProxyConnectTestCase(TestCase):
     @defer.inlineCallbacks
     def test_https_connect_tunnel(self):
         crawler = get_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
-        self._assert_got_response_code(200, l)
+        self._assert_got_response_code(200, log)
 
     @pytest.mark.xfail(reason='Python 3.6+ fails this earlier', condition=sys.version_info.minor >= 6)
     @defer.inlineCallbacks
     def test_https_connect_tunnel_error(self):
         crawler = get_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl("https://localhost:99999/status?n=200")
-        self._assert_got_tunnel_error(l)
+        self._assert_got_tunnel_error(log)
 
     @defer.inlineCallbacks
     def test_https_tunnel_auth_error(self):
         os.environ['https_proxy'] = _wrong_credentials(os.environ['https_proxy'])
         crawler = get_crawler(SimpleSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
         # The proxy returns a 407 error code but it does not reach the client;
         # he just sees a TunnelError.
-        self._assert_got_tunnel_error(l)
+        self._assert_got_tunnel_error(log)
 
     @defer.inlineCallbacks
     def test_https_tunnel_without_leak_proxy_authorization_header(self):
         request = Request(self.mockserver.url("/echo", is_secure=True))
         crawler = get_crawler(SingleRequestSpider)
-        with LogCapture() as l:
+        with LogCapture() as log:
             yield crawler.crawl(seed=request)
-        self._assert_got_response_code(200, l)
+        self._assert_got_response_code(200, log)
         echo = json.loads(crawler.spider.meta['responses'][0].text)
         self.assertTrue('Proxy-Authorization' not in echo['headers'])
 
diff --git a/tests/test_squeues.py b/tests/test_squeues.py
index d2cf9135f..becacce62 100644
--- a/tests/test_squeues.py
+++ b/tests/test_squeues.py
@@ -89,12 +89,12 @@ class PickleFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
 
     def test_serialize_loader(self):
         q = self.queue()
-        l = TestLoader()
-        q.push(l)
-        l2 = q.pop()
-        assert isinstance(l2, TestLoader)
-        assert l2.default_item_class is TestItem
-        self.assertEqual(l2.name_out('x'), 'xx')
+        loader = TestLoader()
+        q.push(loader)
+        loader2 = q.pop()
+        assert isinstance(loader2, TestLoader)
+        assert loader2.default_item_class is TestItem
+        self.assertEqual(loader2.name_out('x'), 'xx')
 
     def test_serialize_request_recursive(self):
         q = self.queue()
@@ -173,12 +173,12 @@ class PickleLifoDiskQueueTest(t.LifoDiskQueueTest, LifoDiskQueueTestMixin):
 
     def test_serialize_loader(self):
         q = self.queue()
-        l = TestLoader()
-        q.push(l)
-        l2 = q.pop()
-        assert isinstance(l2, TestLoader)
-        assert l2.default_item_class is TestItem
-        self.assertEqual(l2.name_out('x'), 'xx')
+        loader = TestLoader()
+        q.push(loader)
+        loader2 = q.pop()
+        assert isinstance(loader2, TestLoader)
+        assert loader2.default_item_class is TestItem
+        self.assertEqual(loader2.name_out('x'), 'xx')
 
     def test_serialize_request_recursive(self):
         q = self.queue()
diff --git a/tests/test_utils_log.py b/tests/test_utils_log.py
index 25cd904bc..535f56691 100644
--- a/tests/test_utils_log.py
+++ b/tests/test_utils_log.py
@@ -34,31 +34,27 @@ class TopLevelFormatterTest(unittest.TestCase):
 
     def test_top_level_logger(self):
         logger = logging.getLogger('test')
-        with self.handler as l:
+        with self.handler as log:
             logger.warning('test log msg')
-
-        l.check(('test', 'WARNING', 'test log msg'))
+        log.check(('test', 'WARNING', 'test log msg'))
 
     def test_children_logger(self):
         logger = logging.getLogger('test.test1')
-        with self.handler as l:
+        with self.handler as log:
             logger.warning('test log msg')
-
-        l.check(('test', 'WARNING', 'test log msg'))
+        log.check(('test', 'WARNING', 'test log msg'))
 
     def test_overlapping_name_logger(self):
         logger = logging.getLogger('test2')
-        with self.handler as l:
+        with self.handler as log:
             logger.warning('test log msg')
-
-        l.check(('test2', 'WARNING', 'test log msg'))
+        log.check(('test2', 'WARNING', 'test log msg'))
 
     def test_different_name_logger(self):
         logger = logging.getLogger('different')
-        with self.handler as l:
+        with self.handler as log:
             logger.warning('test log msg')
-
-        l.check(('different', 'WARNING', 'test log msg'))
+        log.check(('different', 'WARNING', 'test log msg'))
 
 
 class LogCounterHandlerTest(unittest.TestCase):
@@ -107,6 +103,6 @@ class StreamLoggerTest(unittest.TestCase):
         sys.stdout = self.stdout
 
     def test_redirect(self):
-        with LogCapture() as l:
+        with LogCapture() as log:
             print('test log msg')
-        l.check(('test', 'ERROR', 'test log msg'))
+        log.check(('test', 'ERROR', 'test log msg'))
diff --git a/tests/test_utils_misc/__init__.py b/tests/test_utils_misc/__init__.py
index 015a0e5a2..28205e0d9 100644
--- a/tests/test_utils_misc/__init__.py
+++ b/tests/test_utils_misc/__init__.py
@@ -67,12 +67,12 @@ class UtilsMiscTestCase(unittest.TestCase):
         assert hasattr(arg_to_iter(100), '__iter__')
         assert hasattr(arg_to_iter('lala'), '__iter__')
         assert hasattr(arg_to_iter([1, 2, 3]), '__iter__')
-        assert hasattr(arg_to_iter(l for l in 'abcd'), '__iter__')
+        assert hasattr(arg_to_iter(c for c in 'abcd'), '__iter__')
 
         self.assertEqual(list(arg_to_iter(None)), [])
         self.assertEqual(list(arg_to_iter('lala')), ['lala'])
         self.assertEqual(list(arg_to_iter(100)), [100])
-        self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c'])
+        self.assertEqual(list(arg_to_iter(c for c in 'abc')), ['a', 'b', 'c'])
         self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3])
         self.assertEqual(list(arg_to_iter({'a': 1})), [{'a': 1}])
         self.assertEqual(list(arg_to_iter(TestItem(name="john"))), [TestItem(name="john")])
diff --git a/tests/test_utils_signal.py b/tests/test_utils_signal.py
index c83c9398c..b66588efb 100644
--- a/tests/test_utils_signal.py
+++ b/tests/test_utils_signal.py
@@ -20,7 +20,7 @@ class SendCatchLogTest(unittest.TestCase):
 
         dispatcher.connect(self.error_handler, signal=test_signal)
         dispatcher.connect(self.ok_handler, signal=test_signal)
-        with LogCapture() as l:
+        with LogCapture() as log:
             result = yield defer.maybeDeferred(
                 self._get_result, test_signal, arg='test',
                 handlers_called=handlers_called
@@ -28,8 +28,8 @@ class SendCatchLogTest(unittest.TestCase):
 
         assert self.error_handler in handlers_called
         assert self.ok_handler in handlers_called
-        self.assertEqual(len(l.records), 1)
-        record = l.records[0]
+        self.assertEqual(len(log.records), 1)
+        record = log.records[0]
         self.assertIn('error_handler', record.getMessage())
         self.assertEqual(record.levelname, 'ERROR')
         self.assertEqual(result[0][0], self.error_handler)
@@ -95,8 +95,8 @@ class SendCatchLogTest2(unittest.TestCase):
 
         test_signal = object()
         dispatcher.connect(test_handler, test_signal)
-        with LogCapture() as l:
+        with LogCapture() as log:
             send_catch_log(test_signal)
-        self.assertEqual(len(l.records), 1)
-        self.assertIn("Cannot return deferreds from signal handler", str(l))
+        self.assertEqual(len(log.records), 1)
+        self.assertIn("Cannot return deferreds from signal handler", str(log))
         dispatcher.disconnect(test_handler, test_signal)

From fffb0a5b6a7eaf365ecd7ef43e7e45cf2ea7ff2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Hern=C3=A1ndez?= <noviluni@gmail.com>
Date: Fri, 15 May 2020 15:23:09 +0200
Subject: [PATCH 139/181] replace set constructor with set literals (#4573)

---
 tests/test_engine.py                | 8 ++++----
 tests/test_http_request.py          | 6 +++---
 tests/test_pipeline_crawl.py        | 4 ++--
 tests/test_pipeline_images.py       | 2 +-
 tests/test_spiderloader/__init__.py | 8 ++++----
 tests/test_utils_datatypes.py       | 2 +-
 tests/test_utils_misc/__init__.py   | 8 ++++----
 tests/test_utils_url.py             | 2 +-
 8 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tests/test_engine.py b/tests/test_engine.py
index acfe94f63..d781665dc 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -218,8 +218,8 @@ class EngineTest(unittest.TestCase):
     def _assert_visited_urls(self):
         must_be_visited = ["/", "/redirect", "/redirected",
                            "/item1.html", "/item2.html", "/item999.html"]
-        urls_visited = set([rp[0].url for rp in self.run.respplug])
-        urls_expected = set([self.run.geturl(p) for p in must_be_visited])
+        urls_visited = {rp[0].url for rp in self.run.respplug}
+        urls_expected = {self.run.geturl(p) for p in must_be_visited}
         assert urls_expected <= urls_visited, "URLs not visited: %s" % list(urls_expected - urls_visited)
 
     def _assert_scheduled_requests(self, urls_to_visit=None):
@@ -227,8 +227,8 @@ class EngineTest(unittest.TestCase):
 
         paths_expected = ['/item999.html', '/item2.html', '/item1.html']
 
-        urls_requested = set([rq[0].url for rq in self.run.reqplug])
-        urls_expected = set([self.run.geturl(p) for p in paths_expected])
+        urls_requested = {rq[0].url for rq in self.run.reqplug}
+        urls_expected = {self.run.geturl(p) for p in paths_expected}
         assert urls_expected <= urls_requested
         scheduled_requests_count = len(self.run.reqplug)
         dropped_requests_count = len(self.run.reqdropped)
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index a672963f3..349c23f36 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -549,8 +549,8 @@ class FormRequestTest(RequestTest):
         self.assertEqual(urlparse(r1.url).hostname, "www.example.com")
         self.assertEqual(urlparse(r1.url).path, "/this/get.php")
         fs = _qs(r1)
-        self.assertEqual(set(fs[b'test']), set([b'val1', b'val2']))
-        self.assertEqual(set(fs[b'one']), set([b'two', b'three']))
+        self.assertEqual(set(fs[b'test']), {b'val1', b'val2'})
+        self.assertEqual(set(fs[b'one']), {b'two', b'three'})
         self.assertEqual(fs[b'test2'], [b'xxx'])
         self.assertEqual(fs[b'six'], [b'seven'])
 
@@ -1047,7 +1047,7 @@ class FormRequestTest(RequestTest):
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(set(fs), set([b'h2', b'i2', b'i1', b'i3', b'h1', b'i5', b'i4']))
+        self.assertEqual(set(fs), {b'h2', b'i2', b'i1', b'i3', b'h1', b'i5', b'i4'})
 
     def test_from_response_xpath(self):
         response = _buildresponse(
diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py
index 6d15aaf31..74263e50c 100644
--- a/tests/test_pipeline_crawl.py
+++ b/tests/test_pipeline_crawl.py
@@ -51,10 +51,10 @@ class FileDownloadCrawlTestCase(TestCase):
     store_setting_key = 'FILES_STORE'
     media_key = 'files'
     media_urls_key = 'file_urls'
-    expected_checksums = set([
+    expected_checksums = {
         '5547178b89448faf0015a13f904c936e',
         'c2281c83670e31d8aaab7cb642b824db',
-        'ed3f6538dc15d4d9179dae57319edc5f'])
+        'ed3f6538dc15d4d9179dae57319edc5f'}
 
     def setUp(self):
         self.mockserver = MockServer()
diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py
index 5ba03ff4c..76aea470f 100644
--- a/tests/test_pipeline_images.py
+++ b/tests/test_pipeline_images.py
@@ -18,7 +18,7 @@ try:
 except ImportError:
     skip = 'Missing Python Imaging Library, install https://pypi.python.org/pypi/Pillow'
 else:
-    encoders = set(('jpeg_encoder', 'jpeg_decoder'))
+    encoders = {'jpeg_encoder', 'jpeg_decoder'}
     if not encoders.issubset(set(Image.core.__dict__)):
         skip = 'Missing JPEG encoders'
 
diff --git a/tests/test_spiderloader/__init__.py b/tests/test_spiderloader/__init__.py
index 265970b43..d922c6059 100644
--- a/tests/test_spiderloader/__init__.py
+++ b/tests/test_spiderloader/__init__.py
@@ -42,7 +42,7 @@ class SpiderLoaderTest(unittest.TestCase):
     def test_list(self):
         self.assertEqual(
             set(self.spider_loader.list()),
-            set(['spider1', 'spider2', 'spider3', 'spider4']))
+            {'spider1', 'spider2', 'spider3', 'spider4'})
 
     def test_load(self):
         spider1 = self.spider_loader.load("spider1")
@@ -57,7 +57,7 @@ class SpiderLoaderTest(unittest.TestCase):
             ['spider2'])
         self.assertEqual(
             set(self.spider_loader.find_by_request(Request('http://scrapy3.org/test'))),
-            set(['spider1', 'spider2']))
+            {'spider1', 'spider2'})
         self.assertEqual(
             self.spider_loader.find_by_request(Request('http://scrapy999.org/test')),
             [])
@@ -151,7 +151,7 @@ class DuplicateSpiderNameLoaderTest(unittest.TestCase):
             self.assertNotIn("'spider4'", msg)
 
             spiders = set(spider_loader.list())
-            self.assertEqual(spiders, set(['spider1', 'spider2', 'spider3', 'spider4']))
+            self.assertEqual(spiders, {'spider1', 'spider2', 'spider3', 'spider4'})
 
     def test_multiple_dupename_warning(self):
         # copy 2 spider modules so as to have duplicate spider name
@@ -177,4 +177,4 @@ class DuplicateSpiderNameLoaderTest(unittest.TestCase):
             self.assertNotIn("'spider4'", msg)
 
             spiders = set(spider_loader.list())
-            self.assertEqual(spiders, set(['spider1', 'spider2', 'spider3', 'spider4']))
+            self.assertEqual(spiders, {'spider1', 'spider2', 'spider3', 'spider4'})
diff --git a/tests/test_utils_datatypes.py b/tests/test_utils_datatypes.py
index e5aa56eb9..0a4c6034a 100644
--- a/tests/test_utils_datatypes.py
+++ b/tests/test_utils_datatypes.py
@@ -217,7 +217,7 @@ class SequenceExcludeTest(unittest.TestCase):
 
     def test_set(self):
         """Anything that is not in the supplied sequence will evaluate as 'in' the container."""
-        seq = set([-3, "test", 1.1])
+        seq = {-3, "test", 1.1}
         d = SequenceExclude(seq)
         self.assertIn(0, d)
         self.assertIn("foo", d)
diff --git a/tests/test_utils_misc/__init__.py b/tests/test_utils_misc/__init__.py
index 28205e0d9..9bb996d27 100644
--- a/tests/test_utils_misc/__init__.py
+++ b/tests/test_utils_misc/__init__.py
@@ -26,20 +26,20 @@ class UtilsMiscTestCase(unittest.TestCase):
             'tests.test_utils_misc.test_walk_modules.mod.mod0',
             'tests.test_utils_misc.test_walk_modules.mod1',
         ]
-        self.assertEqual(set([m.__name__ for m in mods]), set(expected))
+        self.assertEqual({m.__name__ for m in mods}, set(expected))
 
         mods = walk_modules('tests.test_utils_misc.test_walk_modules.mod')
         expected = [
             'tests.test_utils_misc.test_walk_modules.mod',
             'tests.test_utils_misc.test_walk_modules.mod.mod0',
         ]
-        self.assertEqual(set([m.__name__ for m in mods]), set(expected))
+        self.assertEqual({m.__name__ for m in mods}, set(expected))
 
         mods = walk_modules('tests.test_utils_misc.test_walk_modules.mod1')
         expected = [
             'tests.test_utils_misc.test_walk_modules.mod1',
         ]
-        self.assertEqual(set([m.__name__ for m in mods]), set(expected))
+        self.assertEqual({m.__name__ for m in mods}, set(expected))
 
         self.assertRaises(ImportError, walk_modules, 'nomodule999')
 
@@ -54,7 +54,7 @@ class UtilsMiscTestCase(unittest.TestCase):
                 'testegg.spiders.b',
                 'testegg'
             ]
-            self.assertEqual(set([m.__name__ for m in mods]), set(expected))
+            self.assertEqual({m.__name__ for m in mods}, set(expected))
         finally:
             sys.path.remove(egg)
 
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 16e7449c9..4ab944949 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -55,7 +55,7 @@ class UrlUtilsTest(unittest.TestCase):
         self.assertTrue(url_is_from_spider('http://www.example.net/some/page.html', spider))
         self.assertFalse(url_is_from_spider('http://www.example.us/some/page.html', spider))
 
-        spider = Spider(name='example.com', allowed_domains=set(('example.com', 'example.net')))
+        spider = Spider(name='example.com', allowed_domains={'example.com', 'example.net'})
         self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', spider))
 
         spider = Spider(name='example.com', allowed_domains=('example.com', 'example.net'))

From 36c3c9713e59f5d22bf51354920b5093e2d30b73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Fri, 15 May 2020 19:37:56 +0200
Subject: [PATCH 140/181] Run tests with Python 3.5.0 (#4518)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Run tests with Python 3.5.0

* Use Ubuntu Trusty 14.04 to test Python 3.5.0

* Use mitmproxy<3.0.0 on Python 3.5.0

* Skip tests requiring mitmproxy in Python 3.5.0

* Change the minimum Python version from 3.5 to 3.5.1

* Do not prevent Scrapy from working with Python 3.5.0

* Force system Python 3.5.1

* Do not install a non-system Python in Python 3.5.1 jobs

* Switch to Trusty to be able to test Xenial’s Python version ¯\_(ツ)_/¯

* Add missing trusty

* Stop breaking old PyPy

* Allow installing Scrapy on Python 3.5.0
---
 .travis.yml                 | 10 +++++++---
 README.rst                  |  2 +-
 docs/faq.rst                |  2 +-
 docs/intro/install.rst      |  2 +-
 tests/test_proxy_connect.py |  5 ++++-
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index bcbf75a43..d6ec88e06 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,12 +17,16 @@ matrix:
       python: 3.7  # Keep in sync with .readthedocs.yml
 
     - env: TOXENV=pypy3
+    - env: TOXENV=pinned
+      python: 3.5.1
+      dist: trusty
+    - env: TOXENV=asyncio
+      python: 3.5.1  # We use additional code to support 3.5.3 and earlier
+      dist: trusty
     - env: TOXENV=py
       python: 3.5
-    - env: TOXENV=pinned
-      python: 3.5
     - env: TOXENV=asyncio
-      python: 3.5.2
+      python: 3.5  # We use specific code to support >= 3.5.4, < 3.6
     - env: TOXENV=py
       python: 3.6
     - env: TOXENV=py
diff --git a/README.rst b/README.rst
index ce5973bcd..fd84e127e 100644
--- a/README.rst
+++ b/README.rst
@@ -40,7 +40,7 @@ including a list of features.
 Requirements
 ============
 
-* Python 3.5+
+* Python 3.5.1+
 * Works on Linux, Windows, macOS, BSD
 
 Install
diff --git a/docs/faq.rst b/docs/faq.rst
index 75a0f4864..936f315b3 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -69,7 +69,7 @@ Here's an example spider using BeautifulSoup API, with ``lxml`` as the HTML pars
 What Python versions does Scrapy support?
 -----------------------------------------
 
-Scrapy is supported under Python 3.5+
+Scrapy is supported under Python 3.5.1+
 under CPython (default Python implementation) and PyPy (starting with PyPy 5.9).
 Python 3 support was added in Scrapy 1.1.
 PyPy support was added in Scrapy 1.4, PyPy3 support was added in Scrapy 1.5.
diff --git a/docs/intro/install.rst b/docs/intro/install.rst
index 6356e0eea..4af80d801 100644
--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@@ -7,7 +7,7 @@ Installation guide
 Installing Scrapy
 =================
 
-Scrapy runs on Python 3.5 or above under CPython (default Python
+Scrapy runs on Python 3.5.1 or above under CPython (default Python
 implementation) and PyPy (starting with PyPy 5.9).
 
 If you're using `Anaconda`_ or `Miniconda`_, you can install the package from
diff --git a/tests/test_proxy_connect.py b/tests/test_proxy_connect.py
index 4763a5417..eb4ecc91d 100644
--- a/tests/test_proxy_connect.py
+++ b/tests/test_proxy_connect.py
@@ -4,6 +4,7 @@ import re
 import sys
 from subprocess import Popen, PIPE
 from urllib.parse import urlsplit, urlunsplit
+from unittest import skipIf
 
 import pytest
 from testfixtures import LogCapture
@@ -56,6 +57,8 @@ def _wrong_credentials(proxy_url):
     return urlunsplit(bad_auth_proxy)
 
 
+@skipIf(sys.version_info < (3, 5, 4),
+        "requires mitmproxy < 3.0.0, which these tests do not support")
 class ProxyConnectTestCase(TestCase):
 
     def setUp(self):
@@ -80,7 +83,7 @@ class ProxyConnectTestCase(TestCase):
             yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
         self._assert_got_response_code(200, log)
 
-    @pytest.mark.xfail(reason='Python 3.6+ fails this earlier', condition=sys.version_info.minor >= 6)
+    @pytest.mark.xfail(reason='Python 3.6+ fails this earlier', condition=sys.version_info >= (3, 6))
     @defer.inlineCallbacks
     def test_https_connect_tunnel_error(self):
         crawler = get_crawler(SimpleSpider)

From 4cdd00e21f4bfe22ba9b8fabe034a5e4d34dab75 Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Sat, 16 May 2020 00:25:57 +0530
Subject: [PATCH 141/181] Changed BaseRunSpiderCommands to BaseRunSpiderCommand

---
 scrapy/commands/__init__.py  | 2 +-
 scrapy/commands/crawl.py     | 4 ++--
 scrapy/commands/runspider.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 0ddbf2ca0..81fae089b 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -107,7 +107,7 @@ class ScrapyCommand:
         raise NotImplementedError
 
 
-class BaseRunSpiderCommands(ScrapyCommand):
+class BaseRunSpiderCommand(ScrapyCommand):
     """
     The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
     performing common functionality between crawl.py and runspider.py
diff --git a/scrapy/commands/crawl.py b/scrapy/commands/crawl.py
index c39c5a9ef..e1724c1e6 100644
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@@ -1,8 +1,8 @@
-from scrapy.commands import BaseRunSpiderCommands
+from scrapy.commands import BaseRunSpiderCommand
 from scrapy.exceptions import UsageError
 
 
-class Command(BaseRunSpiderCommands):
+class Command(BaseRunSpiderCommand):
 
     requires_project = True
 
diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py
index bd24a369e..befee021b 100644
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@@ -4,7 +4,7 @@ from importlib import import_module
 
 from scrapy.utils.spider import iter_spider_classes
 from scrapy.exceptions import UsageError
-from scrapy.commands import BaseRunSpiderCommands
+from scrapy.commands import BaseRunSpiderCommand
 
 
 def _import_file(filepath):
@@ -23,7 +23,7 @@ def _import_file(filepath):
     return module
 
 
-class Command(BaseRunSpiderCommands):
+class Command(BaseRunSpiderCommand):
 
     requires_project = False
     default_settings = {'SPIDER_LOADER_WARN_ONLY': True}

From 604fe33bad36f1269677e98d0bfec1f60c95aa53 Mon Sep 17 00:00:00 2001
From: Jay Rajput <jay24rajput@gmail.com>
Date: Sat, 16 May 2020 01:53:49 +0530
Subject: [PATCH 142/181] Update scrapy/commands/__init__.py

Changed typo in a comment for BaseRunSpiderCommand

Co-authored-by: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
---
 scrapy/commands/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index 49152ca28..ab850dcb3 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -108,8 +108,7 @@ class ScrapyCommand:
 
 class BaseRunSpiderCommand(ScrapyCommand):
     """
-    The BaseRunSpiderCommands class inherits the ScrapyCommand class and it Used for
-    performing common functionality between crawl.py and runspider.py
+    Common class used to share functionality between the crawl and runspider commands
     """
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)

From a915af2e4592e8a0367c44a73c95cee3f835887d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 11 May 2020 16:13:54 -0300
Subject: [PATCH 143/181] Remove monkeypatches module, E402 flake8 code

---
 pytest.ini                        |  4 +--
 scrapy/__init__.py                | 47 ++++++++++++++++---------------
 scrapy/_monkeypatches.py          | 11 --------
 scrapy/linkextractors/__init__.py |  2 +-
 scrapy/spiders/__init__.py        |  6 ++--
 5 files changed, 30 insertions(+), 40 deletions(-)
 delete mode 100644 scrapy/_monkeypatches.py

diff --git a/pytest.ini b/pytest.ini
index 0e289866c..0797e8ac8 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -27,18 +27,16 @@ flake8-ignore =
     # Exclude files that are meant to provide top-level imports
     # E402: Module level import not at top of file
     # F401: Module imported but unused
+    scrapy/__init__.py E402
     scrapy/core/downloader/handlers/http.py F401
     scrapy/http/__init__.py F401
     scrapy/linkextractors/__init__.py E402 F401
     scrapy/spiders/__init__.py E402 F401
 
     # Issues pending a review:
-    scrapy/__init__.py E402
     scrapy/selector/__init__.py F403
-    scrapy/spiders/__init__.py E402
     scrapy/utils/http.py F403
     scrapy/utils/markup.py F403
     scrapy/utils/multipart.py F403
     scrapy/utils/url.py F403 F405
     tests/test_loader.py E741
-    tests/test_webclient.py E402
diff --git a/scrapy/__init__.py b/scrapy/__init__.py
index fb8357f3c..e791deaa6 100644
--- a/scrapy/__init__.py
+++ b/scrapy/__init__.py
@@ -2,33 +2,11 @@
 Scrapy - a web crawling and web scraping framework written for Python
 """
 
-__all__ = ['__version__', 'version_info', 'twisted_version',
-           'Spider', 'Request', 'FormRequest', 'Selector', 'Item', 'Field']
-
-# Scrapy version
 import pkgutil
-__version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip()
-version_info = tuple(int(v) if v.isdigit() else v
-                     for v in __version__.split('.'))
-del pkgutil
-
-# Check minimum required Python version
 import sys
-if sys.version_info < (3, 5):
-    print("Scrapy %s requires Python 3.5" % __version__)
-    sys.exit(1)
-
-# Ignore noisy twisted deprecation warnings
 import warnings
-warnings.filterwarnings('ignore', category=DeprecationWarning, module='twisted')
-del warnings
-
-# Apply monkey patches to fix issues in external libraries
-from scrapy import _monkeypatches
-del _monkeypatches
 
 from twisted import version as _txv
-twisted_version = (_txv.major, _txv.minor, _txv.micro)
 
 # Declare top-level shortcuts
 from scrapy.spiders import Spider
@@ -36,4 +14,29 @@ from scrapy.http import Request, FormRequest
 from scrapy.selector import Selector
 from scrapy.item import Item, Field
 
+
+__all__ = [
+    '__version__', 'version_info', 'twisted_version', 'Spider',
+    'Request', 'FormRequest', 'Selector', 'Item', 'Field',
+]
+
+
+# Scrapy and Twisted versions
+__version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip()
+version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split('.'))
+twisted_version = (_txv.major, _txv.minor, _txv.micro)
+
+
+# Check minimum required Python version
+if sys.version_info < (3, 5):
+    print("Scrapy %s requires Python 3.5" % __version__)
+    sys.exit(1)
+
+
+# Ignore noisy twisted deprecation warnings
+warnings.filterwarnings('ignore', category=DeprecationWarning, module='twisted')
+
+
+del pkgutil
 del sys
+del warnings
diff --git a/scrapy/_monkeypatches.py b/scrapy/_monkeypatches.py
deleted file mode 100644
index f74f89bda..000000000
--- a/scrapy/_monkeypatches.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import copyreg
-
-
-# Undo what Twisted's perspective broker adds to pickle register
-# to prevent bugs like Twisted#7989 while serializing requests
-import twisted.persisted.styles  # NOQA
-# Remove only entries with twisted serializers for non-twisted types.
-for k, v in frozenset(copyreg.dispatch_table.items()):
-    if not str(getattr(k, '__module__', '')).startswith('twisted') \
-            and str(getattr(v, '__module__', '')).startswith('twisted'):
-        copyreg.dispatch_table.pop(k)
diff --git a/scrapy/linkextractors/__init__.py b/scrapy/linkextractors/__init__.py
index ae019c70f..984a5c4e1 100644
--- a/scrapy/linkextractors/__init__.py
+++ b/scrapy/linkextractors/__init__.py
@@ -133,4 +133,4 @@ class FilteringLinkExtractor:
 
 
 # Top-level imports
-from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor as LinkExtractor  # noqa: F401
+from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor as LinkExtractor
diff --git a/scrapy/spiders/__init__.py b/scrapy/spiders/__init__.py
index ba1c866f8..02f87f8f5 100644
--- a/scrapy/spiders/__init__.py
+++ b/scrapy/spiders/__init__.py
@@ -110,6 +110,6 @@ class Spider(object_ref):
 
 
 # Top-level imports
-from scrapy.spiders.crawl import CrawlSpider, Rule  # noqa: F401
-from scrapy.spiders.feed import XMLFeedSpider, CSVFeedSpider  # noqa: F401
-from scrapy.spiders.sitemap import SitemapSpider  # noqa: F401
+from scrapy.spiders.crawl import CrawlSpider, Rule
+from scrapy.spiders.feed import XMLFeedSpider, CSVFeedSpider
+from scrapy.spiders.sitemap import SitemapSpider

From a22f97052e9b14631a977600c1f59bd468c85601 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 15 May 2020 21:18:26 -0300
Subject: [PATCH 144/181] Cleanup import in selector module

---
 pytest.ini                  | 2 +-
 scrapy/selector/__init__.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 0797e8ac8..663c5cc78 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -31,10 +31,10 @@ flake8-ignore =
     scrapy/core/downloader/handlers/http.py F401
     scrapy/http/__init__.py F401
     scrapy/linkextractors/__init__.py E402 F401
+    scrapy/selector/__init__.py F401
     scrapy/spiders/__init__.py E402 F401
 
     # Issues pending a review:
-    scrapy/selector/__init__.py F403
     scrapy/utils/http.py F403
     scrapy/utils/markup.py F403
     scrapy/utils/multipart.py F403
diff --git a/scrapy/selector/__init__.py b/scrapy/selector/__init__.py
index a9240c1f6..85c500d66 100644
--- a/scrapy/selector/__init__.py
+++ b/scrapy/selector/__init__.py
@@ -1,4 +1,6 @@
 """
 Selectors
 """
-from scrapy.selector.unified import *  # noqa: F401
+
+# top-level imports
+from scrapy.selector.unified import Selector, SelectorList

From bcc40c40771ad223471ab77ad47233c498312095 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Mon, 18 May 2020 22:12:31 +0500
Subject: [PATCH 145/181] better deprecation warning for
 Response.body_as_unicode() (#4579)

---
 scrapy/http/response/text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
index 5614e6e55..0603b6653 100644
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@@ -65,7 +65,7 @@ class TextResponse(Response):
         """Return body as unicode"""
         warnings.warn('Response.body_as_unicode() is deprecated, '
                       'please use Response.text instead.',
-                      ScrapyDeprecationWarning)
+                      ScrapyDeprecationWarning, stacklevel=2)
         return self.text
 
     @property

From afb2e5011fb9d9aa0ca568d376b0c4cef4193a9e Mon Sep 17 00:00:00 2001
From: nsirletti <n.sirletti@gmail.com>
Date: Sat, 16 May 2020 12:48:21 +0200
Subject: [PATCH 146/181] Allow doc to be downloadable on readthedocs.org

---
 .readthedocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 17eba34f3..e4d3f02cc 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,4 +1,5 @@
 version: 2
+formats: all
 sphinx:
   configuration: docs/conf.py
   fail_on_warning: true

From c2a0cca0fe6bc8342efa6034a78b9b8161aa2177 Mon Sep 17 00:00:00 2001
From: nsirletti <n.sirletti@gmail.com>
Date: Sat, 16 May 2020 15:07:16 +0200
Subject: [PATCH 147/181] Suppress Sphinx warnings about MIME types other than
 .rst

---
 docs/conf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index 8ab38a090..468c1d190 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -100,6 +100,9 @@ exclude_trees = ['.build']
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
+# List of Sphinx warnings that will not be raised
+suppress_warnings = ['epub.unknown_project_files']
+
 
 # Options for HTML output
 # -----------------------

From f418c6d5f7307c3beae57dfc519be04657a8e1b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 19 May 2020 20:04:47 +0200
Subject: [PATCH 148/181] logging.rst: remove unused, misleading import

---
 docs/topics/logging.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
index 675e65ef1..e81091651 100644
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -262,7 +262,6 @@ scrapy.utils.log module
     This is an example on how to redirect ``INFO`` or higher messages to a file::
 
         import logging
-        from scrapy.utils.log import configure_logging
 
         logging.basicConfig(
             filename='log.txt',

From eb8493f09c56dea4dad129882fbf7b4a4da08e1a Mon Sep 17 00:00:00 2001
From: Kyle Altendorf <sda@fstab.net>
Date: Wed, 20 May 2020 09:21:34 -0400
Subject: [PATCH 149/181] Incompatible with pytest 5.4 and 5.4.1

---
 tests/requirements-py3.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/requirements-py3.txt b/tests/requirements-py3.txt
index d207c5fb0..05b794d81 100644
--- a/tests/requirements-py3.txt
+++ b/tests/requirements-py3.txt
@@ -2,7 +2,7 @@
 jmespath
 mitmproxy; python_version >= '3.6'
 mitmproxy<4.0.0; python_version < '3.6'
-pytest < 5.4
+pytest != 5.4, != 5.4.1
 pytest-cov
 pytest-twisted >= 1.11
 pytest-xdist

From 276721a5dc96a4239645db1a78fe056df671e10e Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sat, 9 May 2020 22:02:40 -0300
Subject: [PATCH 150/181] Stop response download

---
 docs/faq.rst                              | 13 +++++
 docs/topics/exceptions.rst                | 46 +++++++++++++++---
 docs/topics/request-response.rst          | 49 ++++++++++++++++++-
 docs/topics/signals.rst                   | 11 ++++-
 scrapy/core/downloader/handlers/http11.py | 59 +++++++++++++----------
 scrapy/core/scraper.py                    | 10 +++-
 scrapy/exceptions.py                      | 13 +++++
 scrapy/utils/signal.py                    |  7 +--
 tests/spiders.py                          | 33 +++++++++++++
 tests/test_crawl.py                       | 40 +++++++++++++--
 tests/test_engine.py                      | 57 ++++++++++++++++++++--
 11 files changed, 291 insertions(+), 47 deletions(-)

diff --git a/docs/faq.rst b/docs/faq.rst
index 0b6bd6a86..c06cb945b 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -371,6 +371,19 @@ Twisted reactor is :class:`twisted.internet.selectreactor.SelectReactor`. Switch
 different reactor is possible by using the :setting:`TWISTED_REACTOR` setting.
 
 
+.. _faq-stop-response-download:
+
+How can I cancel the download of a given response?
+--------------------------------------------------
+
+In some situations, it might be useful to stop the download of a certain response.
+For instance, if you only need the first part of a large response and you would like
+to save resources by avoiding the download of the whole body.
+In that case, you could attach a handler to the :class:`~scrapy.signals.bytes_received`
+signal and raise a :exc:`~scrapy.exceptions.StopDownload` exception. Please refer to
+the :ref:`topics-stop-response-download` topic for additional information and examples.
+
+
 .. _has been reported: https://github.com/scrapy/scrapy/issues/2905
 .. _user agents: https://en.wikipedia.org/wiki/User_agent
 .. _LIFO: https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
diff --git a/docs/topics/exceptions.rst b/docs/topics/exceptions.rst
index 09cb8ed66..10b23edc4 100644
--- a/docs/topics/exceptions.rst
+++ b/docs/topics/exceptions.rst
@@ -14,13 +14,6 @@ Built-in Exceptions reference
 
 Here's a list of all exceptions included in Scrapy and their usage.
 
-DropItem
---------
-
-.. exception:: DropItem
-
-The exception that must be raised by item pipeline stages to stop processing an
-Item. For more information see :ref:`topics-item-pipeline`.
 
 CloseSpider
 -----------
@@ -47,6 +40,14 @@ DontCloseSpider
 This exception can be raised in a :signal:`spider_idle` signal handler to
 prevent the spider from being closed.
 
+DropItem
+--------
+
+.. exception:: DropItem
+
+The exception that must be raised by item pipeline stages to stop processing an
+Item. For more information see :ref:`topics-item-pipeline`.
+
 IgnoreRequest
 -------------
 
@@ -77,3 +78,34 @@ NotSupported
 
 This exception is raised to indicate an unsupported feature.
 
+StopDownload
+-------------
+
+.. versionadded:: 2.2
+
+.. exception:: StopDownload(fail=True)
+
+Raised from a :class:`~scrapy.signals.bytes_received` signal handler to
+indicate that no further bytes should be downloaded for a response.
+
+The ``fail`` boolean parameter controls which method will handle the resulting
+response:
+
+* If ``fail=True``, the request errback is called. The response object is
+  available as the ``response`` attribute of the received
+  :class:`~twisted.python.failure.Failure` object. This is the default behaviour.
+
+* If ``fail=False``, the request callback is called instead.
+
+In both cases, the response could have its body truncated: the body contains
+all bytes received up until the exception is raised, including the bytes
+received in the signal handler that raises the exception. Also, the response
+object is marked with ``"download_stopped"`` in its :attr:`Response.flags`
+attribute.
+
+.. note:: ``fail`` is a keyword-only parameter, i.e. raising
+    ``StopDownload(False)`` or ``StopDownload(True)`` will raise
+    a :class:`TypeError`.
+
+See the documentation for the :class:`~scrapy.signals.bytes_received` signal
+and the :ref:`topics-stop-response-download` topic for additional information and examples.
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 15a83f453..ae25ff7e4 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -385,6 +385,51 @@ The meta key is used set retry times per request. When initialized, the
 :reqmeta:`max_retry_times` meta key takes higher precedence over the
 :setting:`RETRY_TIMES` setting.
 
+
+.. _topics-stop-response-download:
+
+Stopping the download of a Response
+===================================
+
+Raising a :exc:`~scrapy.exceptions.StopDownload` exception from a
+:class:`~scrapy.signals.bytes_received` signal handler will stop the
+download of a given response. See the following example::
+
+    import scrapy
+
+
+    class StopSpider(scrapy.Spider):
+        name = "stop"
+        start_urls = ["https://docs.scrapy.org/en/latest/"]
+
+        @classmethod
+        def from_crawler(cls, crawler):
+            spider = super().from_crawler(crawler)
+            crawler.signals.connect(spider.on_bytes_received, signal=scrapy.signals.bytes_received)
+            return spider
+
+        def parse(self, response):
+            # 'last_chars' show that the full response was not downloaded
+            yield {"len": len(response.text), "last_chars": response.text[-40:]}
+
+        def on_bytes_received(self, data, request, spider):
+            raise scrapy.exceptions.StopDownload(fail=False)
+
+which produces the following output::
+
+    2020-05-19 17:26:12 [scrapy.core.engine] INFO: Spider opened
+    2020-05-19 17:26:12 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
+    2020-05-19 17:26:13 [scrapy.core.downloader.handlers.http11] DEBUG: Download stopped for <GET https://docs.scrapy.org/en/latest/> from signal handler StopSpider.on_bytes_received
+    2020-05-19 17:26:13 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://docs.scrapy.org/en/latest/> (referer: None) ['download_stopped']
+    2020-05-19 17:26:13 [scrapy.core.scraper] DEBUG: Scraped from <200 https://docs.scrapy.org/en/latest/>
+    {'len': 279, 'last_chars': 'dth, initial-scale=1.0">\n  \n  <title>Scr'}
+    2020-05-19 17:26:13 [scrapy.core.engine] INFO: Closing spider (finished)
+
+By default, resulting responses are handled by their corresponding errbacks. To
+call their callback instead, like in this example, pass ``fail=False`` to the
+:exc:`~scrapy.exceptions.StopDownload` exception.
+
+
 .. _topics-request-response-ref-request-subclasses:
 
 Request subclasses
@@ -716,9 +761,9 @@ Response objects
         .. versionadded:: 2.1.0
 
         The IP address of the server from which the Response originated.
-        
+
         This attribute is currently only populated by the HTTP 1.1 download
-        handler, i.e. for ``http(s)`` responses. For other handlers, 
+        handler, i.e. for ``http(s)`` responses. For other handlers,
         :attr:`ip_address` is always ``None``.
 
     .. method:: Response.copy()
diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 7fe63a7b0..fe4fb0834 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -373,6 +373,8 @@ request_left_downloader
 bytes_received
 ~~~~~~~~~~~~~~
 
+.. versionadded:: 2.2
+
 .. signal:: bytes_received
 .. function:: bytes_received(data, request, spider)
 
@@ -385,14 +387,19 @@ bytes_received
     This signal does not support returning deferreds from its handlers.
 
     :param data: the data received by the download handler
-    :type spider: :class:`bytes` object
+    :type data: :class:`bytes` object
 
-    :param request: the request that generated the response
+    :param request: the request that generated the download
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider associated with the response
     :type spider: :class:`~scrapy.spiders.Spider` object
 
+.. note:: Handlers of this signal can stop the download of a response while it
+    is in progress by raising the :exc:`~scrapy.exceptions.StopDownload`
+    exception. Please refer to the :ref:`topics-stop-response-download` topic
+    for additional information and examples.
+
 Response signals
 ----------------
 
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index c21491f52..555a3a032 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -12,6 +12,7 @@ from urllib.parse import urldefrag
 from twisted.internet import defer, protocol, ssl
 from twisted.internet.endpoints import TCP4ClientEndpoint
 from twisted.internet.error import TimeoutError
+from twisted.python.failure import Failure
 from twisted.web.client import Agent, HTTPConnectionPool, ResponseDone, ResponseFailed, URI
 from twisted.web.http import _DataLoss, PotentialDataLoss
 from twisted.web.http_headers import Headers as TxHeaders
@@ -21,7 +22,7 @@ from zope.interface import implementer
 from scrapy import signals
 from scrapy.core.downloader.tls import openssl_methods
 from scrapy.core.downloader.webclient import _parse
-from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.exceptions import ScrapyDeprecationWarning, StopDownload
 from scrapy.http import Headers
 from scrapy.responsetypes import responsetypes
 from scrapy.utils.misc import create_instance, load_object
@@ -431,7 +432,7 @@ class ScrapyAgent:
     def _cb_bodydone(self, result, request, url):
         headers = Headers(result["txresponse"].headers.getAllRawHeaders())
         respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
-        return respcls(
+        response = respcls(
             url=url,
             status=int(result["txresponse"].code),
             headers=headers,
@@ -440,6 +441,14 @@ class ScrapyAgent:
             certificate=result["certificate"],
             ip_address=result["ip_address"],
         )
+        if result.get("failure"):
+            # This failure is not the same object that will reach the errback,
+            # so we need to temporarily store the response in the exception.
+            # It will be moved to the failure in core/scraper.py
+            failure = result["failure"]
+            failure.value.response = response
+            return failure
+        return response
 
 
 @implementer(IBodyProducer)
@@ -477,6 +486,16 @@ class _ResponseReader(protocol.Protocol):
         self._ip_address = None
         self._crawler = crawler
 
+    def _finish_response(self, flags=None, failure=None):
+        self._finished.callback({
+            "txresponse": self._txresponse,
+            "body": self._bodybuf.getvalue(),
+            "flags": flags,
+            "certificate": self._certificate,
+            "ip_address": self._ip_address,
+            "failure": failure,
+        })
+
     def connectionMade(self):
         if self._certificate is None:
             with suppress(AttributeError):
@@ -493,12 +512,21 @@ class _ResponseReader(protocol.Protocol):
         self._bodybuf.write(bodyBytes)
         self._bytes_received += len(bodyBytes)
 
-        self._crawler.signals.send_catch_log(
+        bytes_received_result = self._crawler.signals.send_catch_log(
             signal=signals.bytes_received,
             data=bodyBytes,
             request=self._request,
             spider=self._crawler.spider,
         )
+        for handler, result in bytes_received_result:
+            if isinstance(result, Failure) and isinstance(result.value, StopDownload):
+                logger.debug("Download stopped for %(request)s from signal handler %(handler)s",
+                             {"request": self._request, "handler": handler.__qualname__})
+                self.transport._producer.loseConnection()
+                self._finish_response(
+                    flags=["download_stopped"],
+                    failure=result if result.value.fail else None,
+                )
 
         if self._maxsize and self._bytes_received > self._maxsize:
             logger.error("Received (%(bytes)s) bytes larger than download "
@@ -521,36 +549,17 @@ class _ResponseReader(protocol.Protocol):
         if self._finished.called:
             return
 
-        body = self._bodybuf.getvalue()
         if reason.check(ResponseDone):
-            self._finished.callback({
-                "txresponse": self._txresponse,
-                "body": body,
-                "flags": None,
-                "certificate": self._certificate,
-                "ip_address": self._ip_address,
-            })
+            self._finish_response()
             return
 
         if reason.check(PotentialDataLoss):
-            self._finished.callback({
-                "txresponse": self._txresponse,
-                "body": body,
-                "flags": ["partial"],
-                "certificate": self._certificate,
-                "ip_address": self._ip_address,
-            })
+            self._finish_response(flags=["partial"])
             return
 
         if reason.check(ResponseFailed) and any(r.check(_DataLoss) for r in reason.value.reasons):
             if not self._fail_on_dataloss:
-                self._finished.callback({
-                    "txresponse": self._txresponse,
-                    "body": body,
-                    "flags": ["dataloss"],
-                    "certificate": self._certificate,
-                    "ip_address": self._ip_address,
-                })
+                self._finish_response(flags=["dataloss"])
                 return
 
             elif not self._fail_on_dataloss_warned:
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index 6785e103d..c52ada83b 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -11,7 +11,7 @@ from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errba
 from scrapy.utils.spider import iterate_spider_output
 from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
 from scrapy.utils.log import logformatter_adapter, failure_to_exc_info
-from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
+from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest, StopDownload
 from scrapy import signals
 from scrapy.http import Request, Response
 from scrapy.item import _BaseItem
@@ -147,6 +147,14 @@ class Scraper:
 
     def call_spider(self, result, request, spider):
         result.request = request
+        # StopDownload exceptions: make the partial response an attribute of the failure
+        if (
+            isinstance(result, Failure)
+            and isinstance(result.value, StopDownload)
+            and hasattr(result.value, "response")
+        ):
+            result.response = result.value.response
+            delattr(result.value, "response")
         dfd = defer_result(result)
         callback = request.callback or spider.parse
         warn_on_generator_with_return_value(spider, callback)
diff --git a/scrapy/exceptions.py b/scrapy/exceptions.py
index 7c4bb3d00..45f152321 100644
--- a/scrapy/exceptions.py
+++ b/scrapy/exceptions.py
@@ -41,6 +41,18 @@ class CloseSpider(Exception):
         self.reason = reason
 
 
+class StopDownload(Exception):
+    """
+    Stop the download of the body for a given response.
+    The 'fail' boolean parameter indicates whether or not the resulting partial response
+    should be handled by the request errback. Note that 'fail' is a keyword-only argument.
+    """
+
+    def __init__(self, *, fail=True):
+        super().__init__()
+        self.fail = fail
+
+
 # Items
 
 
@@ -59,6 +71,7 @@ class NotSupported(Exception):
 
 class UsageError(Exception):
     """To indicate a command-line usage error"""
+
     def __init__(self, *a, **kw):
         self.print_help = kw.pop('print_help', True)
         super(UsageError, self).__init__(*a, **kw)
diff --git a/scrapy/utils/signal.py b/scrapy/utils/signal.py
index a311e9257..115707182 100644
--- a/scrapy/utils/signal.py
+++ b/scrapy/utils/signal.py
@@ -5,13 +5,14 @@ import logging
 from twisted.internet.defer import DeferredList, Deferred
 from twisted.python.failure import Failure
 
-from pydispatch.dispatcher import Any, Anonymous, liveReceivers, \
-    getAllReceivers, disconnect
+from pydispatch.dispatcher import Anonymous, Any, disconnect, getAllReceivers, liveReceivers
 from pydispatch.robustapply import robustApply
 
+from scrapy.exceptions import StopDownload
 from scrapy.utils.defer import maybeDeferred_coro
 from scrapy.utils.log import failure_to_exc_info
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -23,7 +24,7 @@ def send_catch_log(signal=Any, sender=Anonymous, *arguments, **named):
     """Like pydispatcher.robust.sendRobust but it also logs errors and returns
     Failures instead of exceptions.
     """
-    dont_log = named.pop('dont_log', _IgnoredException)
+    dont_log = (named.pop('dont_log', _IgnoredException), StopDownload)
     spider = named.get('spider', None)
     responses = []
     for receiver in liveReceivers(getAllReceivers(sender, signal)):
diff --git a/tests/spiders.py b/tests/spiders.py
index 33d5d02e1..ebe81a1a9 100644
--- a/tests/spiders.py
+++ b/tests/spiders.py
@@ -7,6 +7,8 @@ from urllib.parse import urlencode
 
 from twisted.internet import defer
 
+from scrapy import signals
+from scrapy.exceptions import StopDownload
 from scrapy.http import Request
 from scrapy.item import Item
 from scrapy.linkextractors import LinkExtractor
@@ -267,3 +269,34 @@ class CrawlSpiderWithErrback(MockServerSpider, CrawlSpider):
 
     def errback(self, failure):
         self.logger.info('[errback] status %i', failure.value.response.status)
+
+
+class BytesReceivedCallbackSpider(MetaSpider):
+
+    full_response_length = 2**18
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = super().from_crawler(crawler, *args, **kwargs)
+        crawler.signals.connect(spider.bytes_received, signals.bytes_received)
+        return spider
+
+    def start_requests(self):
+        body = b"a" * self.full_response_length
+        url = self.mockserver.url("/alpayload")
+        yield Request(url, method="POST", body=body, errback=self.errback)
+
+    def parse(self, response):
+        self.meta["response"] = response
+
+    def errback(self, failure):
+        self.meta["failure"] = failure
+
+    def bytes_received(self, data, request, spider):
+        raise StopDownload(fail=False)
+
+
+class BytesReceivedErrbackSpider(BytesReceivedCallbackSpider):
+
+    def bytes_received(self, data, request, spider):
+        raise StopDownload(fail=True)
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 84f80d103..5e70064c2 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -9,17 +9,31 @@ from pytest import mark
 from testfixtures import LogCapture
 from twisted.internet import defer
 from twisted.internet.ssl import Certificate
+from twisted.python.failure import Failure
 from twisted.trial.unittest import TestCase
 
 from scrapy import signals
 from scrapy.crawler import CrawlerRunner
+from scrapy.exceptions import StopDownload
 from scrapy.http import Request
+from scrapy.http.response import Response
 from scrapy.utils.python import to_unicode
 from tests.mockserver import MockServer
-from tests.spiders import (FollowAllSpider, DelaySpider, SimpleSpider, BrokenStartRequestsSpider,
-                           SingleRequestSpider, DuplicateStartRequestsSpider, CrawlSpiderWithErrback,
-                           AsyncDefSpider, AsyncDefAsyncioSpider, AsyncDefAsyncioReturnSpider,
-                           AsyncDefAsyncioReqsReturnSpider)
+from tests.spiders import (
+    AsyncDefAsyncioReqsReturnSpider,
+    AsyncDefAsyncioReturnSpider,
+    AsyncDefAsyncioSpider,
+    AsyncDefSpider,
+    BrokenStartRequestsSpider,
+    BytesReceivedCallbackSpider,
+    BytesReceivedErrbackSpider,
+    CrawlSpiderWithErrback,
+    DelaySpider,
+    DuplicateStartRequestsSpider,
+    FollowAllSpider,
+    SimpleSpider,
+    SingleRequestSpider,
+)
 
 
 class CrawlTestCase(TestCase):
@@ -457,3 +471,21 @@ with multiples lines
         ip_address = crawler.spider.meta['responses'][0].ip_address
         self.assertIsInstance(ip_address, IPv4Address)
         self.assertEqual(str(ip_address), gethostbyname(expected_netloc))
+
+    @defer.inlineCallbacks
+    def test_stop_download_callback(self):
+        crawler = self.runner.create_crawler(BytesReceivedCallbackSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertIsNone(crawler.spider.meta.get("failure"))
+        self.assertIsInstance(crawler.spider.meta["response"], Response)
+        self.assertLess(len(crawler.spider.meta["response"].text), crawler.spider.full_response_length)
+
+    @defer.inlineCallbacks
+    def test_stop_download_errback(self):
+        crawler = self.runner.create_crawler(BytesReceivedErrbackSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertIsNone(crawler.spider.meta.get("response"))
+        self.assertIsInstance(crawler.spider.meta["failure"], Failure)
+        self.assertIsInstance(crawler.spider.meta["failure"].value, StopDownload)
+        self.assertIsInstance(crawler.spider.meta["failure"].response, Response)
+        self.assertLess(len(crawler.spider.meta["failure"].response.text), crawler.spider.full_response_length)
diff --git a/tests/test_engine.py b/tests/test_engine.py
index d781665dc..6696ee52e 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -16,13 +16,15 @@ import sys
 from collections import defaultdict
 from urllib.parse import urlparse
 
+from pydispatch import dispatcher
+from testfixtures import LogCapture
 from twisted.internet import reactor, defer
 from twisted.trial import unittest
 from twisted.web import server, static, util
-from pydispatch import dispatcher
 
 from scrapy import signals
 from scrapy.core.engine import ExecutionEngine
+from scrapy.exceptions import StopDownload
 from scrapy.http import Request
 from scrapy.item import Item, Field
 from scrapy.linkextractors import LinkExtractor
@@ -90,7 +92,7 @@ def start_test_site(debug=False):
     r = static.File(root_dir)
     r.putChild(b"redirect", util.Redirect(b"/redirected"))
     r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
-    numbers = [str(x).encode("utf8") for x in range(2**14)]
+    numbers = [str(x).encode("utf8") for x in range(2**18)]
     r.putChild(b"numbers", static.Data(b"".join(numbers), "text/plain"))
 
     port = reactor.listenTCP(0, server.Site(r), interface="127.0.0.1")
@@ -188,6 +190,16 @@ class CrawlerRun:
         self.signals_caught[sig] = signalargs
 
 
+class StopDownloadCrawlerRun(CrawlerRun):
+    """
+    Make sure raising the StopDownload exception stops the download of the response body
+    """
+
+    def bytes_received(self, data, request, spider):
+        super().bytes_received(data, request, spider)
+        raise StopDownload(fail=False)
+
+
 class EngineTest(unittest.TestCase):
 
     @defer.inlineCallbacks
@@ -316,7 +328,7 @@ class EngineTest(unittest.TestCase):
                 # signal was fired multiple times
                 self.assertTrue(len(data) > 1)
                 # bytes were received in order
-                numbers = [str(x).encode("utf8") for x in range(2**14)]
+                numbers = [str(x).encode("utf8") for x in range(2**18)]
                 self.assertEqual(joined_data, b"".join(numbers))
 
     def _assert_signals_caught(self):
@@ -357,6 +369,45 @@ class EngineTest(unittest.TestCase):
         self.assertEqual(len(e.open_spiders), 0)
 
 
+class StopDownloadEngineTest(EngineTest):
+
+    @defer.inlineCallbacks
+    def test_crawler(self):
+        for spider in TestSpider, DictItemsSpider:
+            self.run = StopDownloadCrawlerRun(spider)
+            with LogCapture() as log:
+                yield self.run.run()
+                log.check_present(("scrapy.core.downloader.handlers.http11",
+                                   "DEBUG",
+                                   "Download stopped for <GET http://localhost:{}/redirected> from signal handler"
+                                   " StopDownloadCrawlerRun.bytes_received".format(self.run.portno)))
+                log.check_present(("scrapy.core.downloader.handlers.http11",
+                                   "DEBUG",
+                                   "Download stopped for <GET http://localhost:{}/> from signal handler"
+                                   " StopDownloadCrawlerRun.bytes_received".format(self.run.portno)))
+                log.check_present(("scrapy.core.downloader.handlers.http11",
+                                   "DEBUG",
+                                   "Download stopped for <GET http://localhost:{}/numbers> from signal handler"
+                                   " StopDownloadCrawlerRun.bytes_received".format(self.run.portno)))
+            self._assert_visited_urls()
+            self._assert_scheduled_requests(urls_to_visit=9)
+            self._assert_downloaded_responses()
+            self._assert_signals_caught()
+            self._assert_bytes_received()
+
+    def _assert_bytes_received(self):
+        self.assertEqual(9, len(self.run.bytes))
+        for request, data in self.run.bytes.items():
+            joined_data = b"".join(data)
+            self.assertTrue(len(data) == 1)  # signal was fired only once
+            if self.run.getpath(request.url) == "/numbers":
+                # Received bytes are not the complete response. The exact amount depends
+                # on the buffer size, which can vary, so we only check that the amount
+                # of received bytes is strictly less than the full response.
+                numbers = [str(x).encode("utf8") for x in range(2**18)]
+                self.assertTrue(len(joined_data) < len(b"".join(numbers)))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1 and sys.argv[1] == 'runserver':
         start_test_site(debug=True)

From 9514393bd1c9137c6a3b9964679de04bccbd0570 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 20 May 2020 15:28:54 -0300
Subject: [PATCH 151/181] Reduce amount of lines

---
 scrapy/core/downloader/handlers/http11.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 555a3a032..a710761c5 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -523,10 +523,8 @@ class _ResponseReader(protocol.Protocol):
                 logger.debug("Download stopped for %(request)s from signal handler %(handler)s",
                              {"request": self._request, "handler": handler.__qualname__})
                 self.transport._producer.loseConnection()
-                self._finish_response(
-                    flags=["download_stopped"],
-                    failure=result if result.value.fail else None,
-                )
+                failure = result if result.value.fail else None
+                self._finish_response(flags=["download_stopped"], failure=failure)
 
         if self._maxsize and self._bytes_received > self._maxsize:
             logger.error("Received (%(bytes)s) bytes larger than download "

From fbe915d9cbd8ab801a679b05265cff416764d558 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 20 May 2020 20:20:30 -0300
Subject: [PATCH 152/181] StopDownload: check partial response contents

---
 tests/spiders.py    | 2 ++
 tests/test_crawl.py | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/spiders.py b/tests/spiders.py
index ebe81a1a9..05078cc04 100644
--- a/tests/spiders.py
+++ b/tests/spiders.py
@@ -293,10 +293,12 @@ class BytesReceivedCallbackSpider(MetaSpider):
         self.meta["failure"] = failure
 
     def bytes_received(self, data, request, spider):
+        self.meta["bytes_received"] = data
         raise StopDownload(fail=False)
 
 
 class BytesReceivedErrbackSpider(BytesReceivedCallbackSpider):
 
     def bytes_received(self, data, request, spider):
+        self.meta["bytes_received"] = data
         raise StopDownload(fail=True)
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 5e70064c2..e2426e0da 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -478,7 +478,8 @@ with multiples lines
         yield crawler.crawl(mockserver=self.mockserver)
         self.assertIsNone(crawler.spider.meta.get("failure"))
         self.assertIsInstance(crawler.spider.meta["response"], Response)
-        self.assertLess(len(crawler.spider.meta["response"].text), crawler.spider.full_response_length)
+        self.assertEqual(crawler.spider.meta["response"].body, crawler.spider.meta.get("bytes_received"))
+        self.assertLess(len(crawler.spider.meta["response"].body), crawler.spider.full_response_length)
 
     @defer.inlineCallbacks
     def test_stop_download_errback(self):
@@ -488,4 +489,5 @@ with multiples lines
         self.assertIsInstance(crawler.spider.meta["failure"], Failure)
         self.assertIsInstance(crawler.spider.meta["failure"].value, StopDownload)
         self.assertIsInstance(crawler.spider.meta["failure"].response, Response)
-        self.assertLess(len(crawler.spider.meta["failure"].response.text), crawler.spider.full_response_length)
+        self.assertEqual(crawler.spider.meta["failure"].response.body, crawler.spider.meta.get("bytes_received"))
+        self.assertLess(len(crawler.spider.meta["failure"].response.body), crawler.spider.full_response_length)

From f6ed5edc31e7cc66225c0860e1534a6230511954 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Fri, 18 Nov 2016 09:14:54 -0300
Subject: [PATCH 153/181] CookiesMiddleware: keep cookies from 'Cookie' request
 header

---
 docs/topics/downloader-middleware.rst      |   5 +
 docs/topics/logging.rst                    |   3 +
 scrapy/downloadermiddlewares/cookies.py    |  78 ++++++++---
 tests/test_downloadermiddleware_cookies.py | 145 ++++++++++++++++++---
 4 files changed, 190 insertions(+), 41 deletions(-)

diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index 1a87d07b6..323e553e5 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -202,6 +202,11 @@ CookiesMiddleware
    sends them back on subsequent requests (from that spider), just like web
    browsers do.
 
+   .. caution:: When non-UTF8 encoded byte sequences are passed to a
+      :class:`~scrapy.http.Request`, the ``CookiesMiddleware`` will log
+      a warning. Refer to :ref:`topics-logging-advanced-customization`
+      to customize the logging behaviour.
+
 The following settings can be used to configure the cookie middleware:
 
 * :setting:`COOKIES_ENABLED`
diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
index e81091651..55065a1a3 100644
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -202,6 +202,9 @@ A custom log format can be set for different actions by extending
 .. autoclass:: scrapy.logformatter.LogFormatter
    :members:
 
+
+.. _topics-logging-advanced-customization:
+
 Advanced customization
 ----------------------
 
diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py
index d57f04bc3..77048f389 100644
--- a/scrapy/downloadermiddlewares/cookies.py
+++ b/scrapy/downloadermiddlewares/cookies.py
@@ -29,8 +29,7 @@ class CookiesMiddleware:
 
         cookiejarkey = request.meta.get("cookiejar")
         jar = self.jars[cookiejarkey]
-        cookies = self._get_request_cookies(jar, request)
-        for cookie in cookies:
+        for cookie in self._get_request_cookies(jar, request):
             jar.set_cookie_if_ok(cookie, request)
 
         # set Cookie header
@@ -68,28 +67,65 @@ class CookiesMiddleware:
                 msg = "Received cookies from: {}\n{}".format(response, cookies)
                 logger.debug(msg, extra={'spider': spider})
 
-    def _format_cookie(self, cookie):
-        # build cookie string
-        cookie_str = '%s=%s' % (cookie['name'], cookie['value'])
-
-        if cookie.get('path', None):
-            cookie_str += '; Path=%s' % cookie['path']
-        if cookie.get('domain', None):
-            cookie_str += '; Domain=%s' % cookie['domain']
+    def _format_cookie(self, cookie, request):
+        """
+        Given a dict consisting of cookie components, return its string representation.
+        Decode from bytes if necessary.
+        """
+        decoded = {}
+        for key in ("name", "value", "path", "domain"):
+            if not cookie.get(key):
+                if key in ("name", "value"):
+                    msg = "Invalid cookie found in request {}: {} ('{}' is missing)"
+                    logger.warning(msg.format(request, cookie, key))
+                    return
+                continue
+            if isinstance(cookie[key], str):
+                decoded[key] = cookie[key]
+            else:
+                try:
+                    decoded[key] = cookie[key].decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
+                                   request, cookie)
+                    decoded[key] = cookie[key].decode("latin1", errors="replace")
 
+        cookie_str = "{}={}".format(decoded.pop("name"), decoded.pop("value"))
+        for key, value in decoded.items():  # path, domain
+            cookie_str += "; {}={}".format(key.capitalize(), value)
         return cookie_str
 
     def _get_request_cookies(self, jar, request):
-        if isinstance(request.cookies, dict):
-            cookie_list = [
-                {'name': k, 'value': v}
-                for k, v in request.cookies.items()
-            ]
-        else:
-            cookie_list = request.cookies
+        """
+        Extract cookies from a Request. Values from the `Request.cookies` attribute
+        take precedence over values from the `Cookie` request header.
+        """
+        def get_cookies_from_header(jar, request):
+            cookie_header = request.headers.get("Cookie")
+            if not cookie_header:
+                return []
+            cookie_gen_bytes = (s.strip() for s in cookie_header.split(b";"))
+            cookie_list_unicode = []
+            for cookie_bytes in cookie_gen_bytes:
+                try:
+                    cookie_unicode = cookie_bytes.decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
+                                   request, cookie_bytes)
+                    cookie_unicode = cookie_bytes.decode("latin1", errors="replace")
+                cookie_list_unicode.append(cookie_unicode)
+            response = Response(request.url, headers={"Set-Cookie": cookie_list_unicode})
+            return jar.make_cookies(response, request)
 
-        cookies = [self._format_cookie(x) for x in cookie_list]
-        headers = {'Set-Cookie': cookies}
-        response = Response(request.url, headers=headers)
+        def get_cookies_from_attribute(jar, request):
+            if not request.cookies:
+                return []
+            elif isinstance(request.cookies, dict):
+                cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
+            else:
+                cookies = request.cookies
+            formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
+            response = Response(request.url, headers={"Set-Cookie": formatted})
+            return jar.make_cookies(response, request)
 
-        return jar.make_cookies(response, request)
+        return get_cookies_from_header(jar, request) + get_cookies_from_attribute(jar, request)
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index d54434c8f..9ccc2110b 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -1,20 +1,21 @@
-import re
 import logging
-from unittest import TestCase
 from testfixtures import LogCapture
+from unittest import TestCase
 
+from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
+from scrapy.downloadermiddlewares.defaultheaders import DefaultHeadersMiddleware
+from scrapy.exceptions import NotConfigured
 from scrapy.http import Response, Request
 from scrapy.spiders import Spider
+from scrapy.utils.python import to_bytes
 from scrapy.utils.test import get_crawler
-from scrapy.exceptions import NotConfigured
-from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
 
 
 class CookiesMiddlewareTest(TestCase):
 
     def assertCookieValEqual(self, first, second, msg=None):
         def split_cookies(cookies):
-            return sorted(re.split(r";\s*", cookies.decode("latin1")))
+            return sorted([s.strip() for s in to_bytes(cookies).split(b";")])
         return self.assertEqual(split_cookies(first), split_cookies(second), msg=msg)
 
     def setUp(self):
@@ -61,12 +62,13 @@ class CookiesMiddlewareTest(TestCase):
     def test_setting_enabled_cookies_debug(self):
         crawler = get_crawler(settings_dict={'COOKIES_DEBUG': True})
         mw = CookiesMiddleware.from_crawler(crawler)
-        with LogCapture('scrapy.downloadermiddlewares.cookies',
-                        propagate=False,
-                        level=logging.DEBUG) as log:
+        with LogCapture(
+            'scrapy.downloadermiddlewares.cookies',
+            propagate=False,
+            level=logging.DEBUG,
+        ) as log:
             req = Request('http://scrapytest.org/')
-            res = Response('http://scrapytest.org/',
-                           headers={'Set-Cookie': 'C1=value1; path=/'})
+            res = Response('http://scrapytest.org/', headers={'Set-Cookie': 'C1=value1; path=/'})
             mw.process_response(req, res, crawler.spider)
             req2 = Request('http://scrapytest.org/sub1/')
             mw.process_request(req2, crawler.spider)
@@ -85,12 +87,13 @@ class CookiesMiddlewareTest(TestCase):
     def test_setting_disabled_cookies_debug(self):
         crawler = get_crawler(settings_dict={'COOKIES_DEBUG': False})
         mw = CookiesMiddleware.from_crawler(crawler)
-        with LogCapture('scrapy.downloadermiddlewares.cookies',
-                        propagate=False,
-                        level=logging.DEBUG) as log:
+        with LogCapture(
+            'scrapy.downloadermiddlewares.cookies',
+            propagate=False,
+            level=logging.DEBUG,
+        ) as log:
             req = Request('http://scrapytest.org/')
-            res = Response('http://scrapytest.org/',
-                           headers={'Set-Cookie': 'C1=value1; path=/'})
+            res = Response('http://scrapytest.org/', headers={'Set-Cookie': 'C1=value1; path=/'})
             mw.process_response(req, res, crawler.spider)
             req2 = Request('http://scrapytest.org/sub1/')
             mw.process_request(req2, crawler.spider)
@@ -102,8 +105,7 @@ class CookiesMiddlewareTest(TestCase):
         assert self.mw.process_request(req, self.spider) is None
         assert 'Cookie' not in req.headers
 
-        headers = {'Set-Cookie': b'C1=in\xa3valid; path=/',
-                   'Other': b'ignore\xa3me'}
+        headers = {'Set-Cookie': b'C1=in\xa3valid; path=/', 'Other': b'ignore\xa3me'}
         res = Response('http://scrapytest.org/', headers=headers)
         assert self.mw.process_response(req, res, self.spider) is res
 
@@ -124,7 +126,10 @@ class CookiesMiddlewareTest(TestCase):
         assert 'Cookie' not in req.headers
 
         # check that returned cookies are not merged back to jar
-        res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
+        res = Response(
+            'http://scrapytest.org/dontmerge',
+            headers={'Set-Cookie': 'dont=mergeme; path=/'},
+        )
         assert self.mw.process_response(req, res, self.spider) is res
 
         # check that cookies are merged back
@@ -179,7 +184,11 @@ class CookiesMiddlewareTest(TestCase):
         self.assertCookieValEqual(req2.headers.get('Cookie'), b"C1=value1; galleta=salada")
 
     def test_cookiejar_key(self):
-        req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': "store1"})
+        req = Request(
+            'http://scrapytest.org/',
+            cookies={'galleta': 'salada'},
+            meta={'cookiejar': "store1"},
+        )
         assert self.mw.process_request(req, self.spider) is None
         self.assertEqual(req.headers.get('Cookie'), b'galleta=salada')
 
@@ -191,7 +200,11 @@ class CookiesMiddlewareTest(TestCase):
         assert self.mw.process_request(req2, self.spider) is None
         self.assertCookieValEqual(req2.headers.get('Cookie'), b'C1=value1; galleta=salada')
 
-        req3 = Request('http://scrapytest.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': "store2"})
+        req3 = Request(
+            'http://scrapytest.org/',
+            cookies={'galleta': 'dulce'},
+            meta={'cookiejar': "store2"},
+        )
         assert self.mw.process_request(req3, self.spider) is None
         self.assertEqual(req3.headers.get('Cookie'), b'galleta=dulce')
 
@@ -229,3 +242,95 @@ class CookiesMiddlewareTest(TestCase):
         assert self.mw.process_request(request, self.spider) is None
         self.assertIn('Cookie', request.headers)
         self.assertEqual(b'currencyCookie=USD', request.headers['Cookie'])
+
+    def test_keep_cookie_from_default_request_headers_middleware(self):
+        DEFAULT_REQUEST_HEADERS = dict(Cookie='default=value; asdf=qwerty')
+        mw_default_headers = DefaultHeadersMiddleware(DEFAULT_REQUEST_HEADERS.items())
+        # overwrite with values from 'cookies' request argument
+        req1 = Request('http://example.org', cookies={'default': 'something'})
+        assert mw_default_headers.process_request(req1, self.spider) is None
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], b'default=something; asdf=qwerty')
+        # keep both
+        req2 = Request('http://example.com', cookies={'a': 'b'})
+        assert mw_default_headers.process_request(req2, self.spider) is None
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], b'default=value; a=b; asdf=qwerty')
+
+    def test_keep_cookie_header(self):
+        # keep only cookies from 'Cookie' request header
+        req1 = Request('http://scrapytest.org', headers={'Cookie': 'a=b; c=d'})
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], 'a=b; c=d')
+        # keep cookies from both 'Cookie' request header and 'cookies' keyword
+        req2 = Request('http://scrapytest.org', headers={'Cookie': 'a=b; c=d'}, cookies={'e': 'f'})
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], 'a=b; c=d; e=f')
+        # overwrite values from 'Cookie' request header with 'cookies' keyword
+        req3 = Request(
+            'http://scrapytest.org',
+            headers={'Cookie': 'a=b; c=d'},
+            cookies={'a': 'new', 'e': 'f'},
+        )
+        assert self.mw.process_request(req3, self.spider) is None
+        self.assertCookieValEqual(req3.headers['Cookie'], 'a=new; c=d; e=f')
+
+    def test_request_cookies_encoding(self):
+        # 1) UTF8-encoded bytes
+        req1 = Request('http://example.org', cookies={'a': u'á'.encode('utf8')})
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 2) Non UTF8-encoded bytes
+        req2 = Request('http://example.org', cookies={'a': u'á'.encode('latin1')})
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 3) Unicode string
+        req3 = Request('http://example.org', cookies={'a': u'á'})
+        assert self.mw.process_request(req3, self.spider) is None
+        self.assertCookieValEqual(req3.headers['Cookie'], b'a=\xc3\xa1')
+
+    def test_request_headers_cookie_encoding(self):
+        # 1) UTF8-encoded bytes
+        req1 = Request('http://example.org', headers={'Cookie': u'a=á'.encode('utf8')})
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 2) Non UTF8-encoded bytes
+        req2 = Request('http://example.org', headers={'Cookie': u'a=á'.encode('latin1')})
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 3) Unicode string
+        req3 = Request('http://example.org', headers={'Cookie': u'a=á'})
+        assert self.mw.process_request(req3, self.spider) is None
+        self.assertCookieValEqual(req3.headers['Cookie'], b'a=\xc3\xa1')
+
+    def test_invalid_cookies(self):
+        """
+        Invalid cookies are logged as warnings and discarded
+        """
+        with LogCapture(
+            'scrapy.downloadermiddlewares.cookies',
+            propagate=False,
+            level=logging.INFO,
+        ) as lc:
+            cookies1 = [{'value': 'bar'}, {'name': 'key', 'value': 'value1'}]
+            req1 = Request('http://example.org/1', cookies=cookies1)
+            assert self.mw.process_request(req1, self.spider) is None
+            cookies2 = [{'name': 'foo'}, {'name': 'key', 'value': 'value2'}]
+            req2 = Request('http://example.org/2', cookies=cookies2)
+            assert self.mw.process_request(req2, self.spider) is None
+            lc.check(
+                ("scrapy.downloadermiddlewares.cookies",
+                 "WARNING",
+                 "Invalid cookie found in request <GET http://example.org/1>:"
+                 " {'value': 'bar'} ('name' is missing)"),
+                ("scrapy.downloadermiddlewares.cookies",
+                 "WARNING",
+                 "Invalid cookie found in request <GET http://example.org/2>:"
+                 " {'name': 'foo'} ('value' is missing)"),
+            )
+        self.assertCookieValEqual(req1.headers['Cookie'], 'key=value1')
+        self.assertCookieValEqual(req2.headers['Cookie'], 'key=value2')

From 08756cd87a7699dd0f73dcdc3178671fb6fec2d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Fri, 22 May 2020 18:06:54 +0200
Subject: [PATCH 154/181] Fix the description of the ScreenshotPipeline example
 item pipeline

---
 docs/topics/item-pipeline.rst | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/topics/item-pipeline.rst b/docs/topics/item-pipeline.rst
index 533f84630..c9194caa1 100644
--- a/docs/topics/item-pipeline.rst
+++ b/docs/topics/item-pipeline.rst
@@ -167,11 +167,13 @@ method and how to clean up the resources properly.::
 Take screenshot of item
 -----------------------
 
-This example demonstrates how to return a
-:class:`~twisted.internet.defer.Deferred` from the :meth:`process_item` method.
-It uses Splash_ to render screenshot of item url. Pipeline
-makes request to locally running instance of Splash_. After request is downloaded,
-it saves the screenshot to a file and adds filename to the item.
+This example demonstrates how to use :doc:`coroutine syntax <coroutines>` in
+the :meth:`process_item` method.
+
+This item pipeline makes a request to a locally-running instance of Splash_ to
+render a screenshot of the item URL. After the request response is downloaded,
+the item pipeline saves the screenshot to a file and adds the filename to the
+item.
 
 ::
 

From 5b2af852892faa3bf293225e6198f0cb1bc3bd85 Mon Sep 17 00:00:00 2001
From: Kyle Altendorf <sda@fstab.net>
Date: Fri, 22 May 2020 15:09:58 -0400
Subject: [PATCH 155/181] Link to issue describing troublesome pytest versions

---
 tests/requirements-py3.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements-py3.txt b/tests/requirements-py3.txt
index 05b794d81..91fa1c5b5 100644
--- a/tests/requirements-py3.txt
+++ b/tests/requirements-py3.txt
@@ -2,6 +2,7 @@
 jmespath
 mitmproxy; python_version >= '3.6'
 mitmproxy<4.0.0; python_version < '3.6'
+# https://github.com/pytest-dev/pytest-twisted/issues/93
 pytest != 5.4, != 5.4.1
 pytest-cov
 pytest-twisted >= 1.11

From 0cc0e51ca330ae5d88d7daaf027d13dd8783603f Mon Sep 17 00:00:00 2001
From: Rohit Sharma <rohit_sh1@yahoo.ca>
Date: Sun, 24 May 2020 13:38:44 -0700
Subject: [PATCH 156/181] remove uneeded escape sequence

removed uneeded wscape sequences from method arguments in the docs
folder
---
 docs/topics/api.rst       |  2 +-
 docs/topics/contracts.rst |  2 +-
 docs/topics/exporters.rst | 12 ++++++------
 docs/topics/loaders.rst   | 24 ++++++++++++------------
 docs/topics/spiders.rst   |  2 +-
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/docs/topics/api.rst b/docs/topics/api.rst
index 1c461a511..52509ffdf 100644
--- a/docs/topics/api.rst
+++ b/docs/topics/api.rst
@@ -91,7 +91,7 @@ how you :ref:`configure the downloader middlewares
         provided while constructing the crawler, and it is created after the
         arguments given in the :meth:`crawl` method.
 
-    .. method:: crawl(\*args, \**kwargs)
+    .. method:: crawl(*args, **kwargs)
 
         Starts the crawler by instantiating its spider class with the given
         ``args`` and ``kwargs`` arguments, while setting the execution engine in
diff --git a/docs/topics/contracts.rst b/docs/topics/contracts.rst
index 319f577bc..b8b3078c4 100644
--- a/docs/topics/contracts.rst
+++ b/docs/topics/contracts.rst
@@ -78,7 +78,7 @@ override three methods:
 
 .. module:: scrapy.contracts
 
-.. class:: Contract(method, \*args)
+.. class:: Contract(method, *args)
 
     :param method: callback function to which the contract is associated
     :type method: function
diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index de8b51195..7daf25ab3 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -236,7 +236,7 @@ PythonItemExporter
 XmlItemExporter
 ---------------
 
-.. class:: XmlItemExporter(file, item_element='item', root_element='items', \**kwargs)
+.. class:: XmlItemExporter(file, item_element='item', root_element='items', **kwargs)
 
    Exports Items in XML format to the specified file object.
 
@@ -290,7 +290,7 @@ XmlItemExporter
 CsvItemExporter
 ---------------
 
-.. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', \**kwargs)
+.. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', **kwargs)
 
    Exports Items in CSV format to the given file-like object. If the
    :attr:`fields_to_export` attribute is set, it will be used to define the
@@ -323,7 +323,7 @@ CsvItemExporter
 PickleItemExporter
 ------------------
 
-.. class:: PickleItemExporter(file, protocol=0, \**kwargs)
+.. class:: PickleItemExporter(file, protocol=0, **kwargs)
 
    Exports Items in pickle format to the given file-like object.
 
@@ -343,7 +343,7 @@ PickleItemExporter
 PprintItemExporter
 ------------------
 
-.. class:: PprintItemExporter(file, \**kwargs)
+.. class:: PprintItemExporter(file, **kwargs)
 
    Exports Items in pretty print format to the specified file object.
 
@@ -363,7 +363,7 @@ PprintItemExporter
 JsonItemExporter
 ----------------
 
-.. class:: JsonItemExporter(file, \**kwargs)
+.. class:: JsonItemExporter(file, **kwargs)
 
    Exports Items in JSON format to the specified file-like object, writing all
    objects as a list of objects. The additional ``__init__`` method arguments are
@@ -392,7 +392,7 @@ JsonItemExporter
 JsonLinesItemExporter
 ---------------------
 
-.. class:: JsonLinesItemExporter(file, \**kwargs)
+.. class:: JsonLinesItemExporter(file, **kwargs)
 
    Exports Items in JSON format to the specified file-like object, writing one
    JSON-encoded item per line. The additional ``__init__`` method arguments are passed
diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index 5f75ccbff..eb804f1db 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -273,7 +273,7 @@ There are several ways to modify Item Loader context values:
 ItemLoader objects
 ==================
 
-.. class:: ItemLoader([item, selector, response], \**kwargs)
+.. class:: ItemLoader([item, selector, response], **kwargs)
 
     Return a new Item Loader for populating the given Item. If no item is
     given, one is instantiated automatically using the class in
@@ -303,7 +303,7 @@ ItemLoader objects
 
     :class:`ItemLoader` instances have the following methods:
 
-    .. method:: get_value(value, \*processors, \**kwargs)
+    .. method:: get_value(value, *processors, **kwargs)
 
         Process the given ``value`` by the given ``processors`` and keyword
         arguments.
@@ -321,7 +321,7 @@ ItemLoader objects
         >>> loader.get_value(u'name: foo', TakeFirst(), unicode.upper, re='name: (.+)')
         'FOO`
 
-    .. method:: add_value(field_name, value, \*processors, \**kwargs)
+    .. method:: add_value(field_name, value, *processors, **kwargs)
 
         Process and then add the given ``value`` for the given field.
 
@@ -343,11 +343,11 @@ ItemLoader objects
             loader.add_value('name', u'name: foo', TakeFirst(), re='name: (.+)')
             loader.add_value(None, {'name': u'foo', 'sex': u'male'})
 
-    .. method:: replace_value(field_name, value, \*processors, \**kwargs)
+    .. method:: replace_value(field_name, value, *processors, **kwargs)
 
         Similar to :meth:`add_value` but replaces the collected data with the
         new value instead of adding it.
-    .. method:: get_xpath(xpath, \*processors, \**kwargs)
+    .. method:: get_xpath(xpath, *processors, **kwargs)
 
         Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
         value, which is used to extract a list of unicode strings from the
@@ -367,7 +367,7 @@ ItemLoader objects
             # HTML snippet: <p id="price">the price is $1200</p>
             loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
 
-    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
+    .. method:: add_xpath(field_name, xpath, *processors, **kwargs)
 
         Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
         value, which is used to extract a list of unicode strings from the
@@ -385,12 +385,12 @@ ItemLoader objects
             # HTML snippet: <p id="price">the price is $1200</p>
             loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
 
-    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
+    .. method:: replace_xpath(field_name, xpath, *processors, **kwargs)
 
         Similar to :meth:`add_xpath` but replaces collected data instead of
         adding it.
 
-    .. method:: get_css(css, \*processors, \**kwargs)
+    .. method:: get_css(css, *processors, **kwargs)
 
         Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
         instead of a value, which is used to extract a list of unicode strings
@@ -410,7 +410,7 @@ ItemLoader objects
             # HTML snippet: <p id="price">the price is $1200</p>
             loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
 
-    .. method:: add_css(field_name, css, \*processors, \**kwargs)
+    .. method:: add_css(field_name, css, *processors, **kwargs)
 
         Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
         instead of a value, which is used to extract a list of unicode strings
@@ -428,7 +428,7 @@ ItemLoader objects
             # HTML snippet: <p id="price">the price is $1200</p>
             loader.add_css('price', 'p#price', re='the price is (.*)')
 
-    .. method:: replace_css(field_name, css, \*processors, \**kwargs)
+    .. method:: replace_css(field_name, css, *processors, **kwargs)
 
         Similar to :meth:`add_css` but replaces collected data instead of
         adding it.
@@ -678,7 +678,7 @@ Here is a list of all built-in processors:
     >>> proc(['one', 'two', 'three'])
     'one<br>two<br>three'
 
-.. class:: Compose(\*functions, \**default_loader_context)
+.. class:: Compose(*functions, **default_loader_context)
 
     A processor which is constructed from the composition of the given
     functions. This means that each input value of this processor is passed to
@@ -706,7 +706,7 @@ Here is a list of all built-in processors:
     active Loader context accessible through the :meth:`ItemLoader.context`
     attribute.
 
-.. class:: MapCompose(\*functions, \**default_loader_context)
+.. class:: MapCompose(*functions, **default_loader_context)
 
     A processor which is constructed from the composition of the given
     functions, similar to the :class:`Compose` processor. The difference with
diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst
index 231db6cea..8ff5e7292 100644
--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@@ -121,7 +121,7 @@ scrapy.Spider
       send log messages through it as described on
       :ref:`topics-logging-from-spiders`.
 
-   .. method:: from_crawler(crawler, \*args, \**kwargs)
+   .. method:: from_crawler(crawler, *args, **kwargs)
 
        This is the class method used by Scrapy to create your spiders.
 

From 898bdd3feb23de793ae0b4c725545c32050fc0ea Mon Sep 17 00:00:00 2001
From: Will Beaufoy <will@willbeaufoy.net>
Date: Sun, 10 May 2020 13:49:43 +0100
Subject: [PATCH 157/181] Update docs README with build instructions using tox

None of the existing commands built the docs properly for me (I had to
revert the changes in 901892d to docs/conf.py to get them to build
properly, and even then no tooltips displayed).

Building them with tox worked for me, but other developers say they
can still use the original method, so the docs now contain both.
---
 docs/README.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/README.rst b/docs/README.rst
index 0a343cd19..0b7afa548 100644
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -57,3 +57,12 @@ There is a way to recreate the doc automatically when you make changes, you
 need to install watchdog (``pip install watchdog``) and then use::
 
     make watch
+
+Alternative method using tox
+----------------------------
+
+To compile the documentation to HTML run the following command::
+
+    tox -e docs
+
+Documentation will be generated (in HTML format) inside the ``.tox/docs/tmp/html`` dir.

From 492197e4773798fcb19ba934c03d5b48e0b7c1f2 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 27 May 2020 10:45:07 -0300
Subject: [PATCH 158/181] StopDownload: store response in the exception instead
 of the failure

---
 docs/topics/exceptions.rst                |  9 ++++++---
 scrapy/core/downloader/handlers/http11.py |  8 ++------
 scrapy/core/scraper.py                    | 10 +---------
 tests/test_crawl.py                       | 10 +++++++---
 4 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/docs/topics/exceptions.rst b/docs/topics/exceptions.rst
index 10b23edc4..583a50ab8 100644
--- a/docs/topics/exceptions.rst
+++ b/docs/topics/exceptions.rst
@@ -91,9 +91,12 @@ indicate that no further bytes should be downloaded for a response.
 The ``fail`` boolean parameter controls which method will handle the resulting
 response:
 
-* If ``fail=True``, the request errback is called. The response object is
-  available as the ``response`` attribute of the received
-  :class:`~twisted.python.failure.Failure` object. This is the default behaviour.
+* If ``fail=True`` (default), the request errback is called. The response object is
+  available as the ``response`` attribute of the ``StopDownload`` exception,
+  which is in turn stored as the ``value`` attribute of the received
+  :class:`~twisted.python.failure.Failure` object. This means that in an errback
+  defined as ``def errback(self, failure)``, the response can be accessed though
+  ``failure.value.response``.
 
 * If ``fail=False``, the request callback is called instead.
 
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index a710761c5..22c9ac520 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -442,12 +442,8 @@ class ScrapyAgent:
             ip_address=result["ip_address"],
         )
         if result.get("failure"):
-            # This failure is not the same object that will reach the errback,
-            # so we need to temporarily store the response in the exception.
-            # It will be moved to the failure in core/scraper.py
-            failure = result["failure"]
-            failure.value.response = response
-            return failure
+            result["failure"].value.response = response
+            return result["failure"]
         return response
 
 
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index c52ada83b..6785e103d 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -11,7 +11,7 @@ from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errba
 from scrapy.utils.spider import iterate_spider_output
 from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
 from scrapy.utils.log import logformatter_adapter, failure_to_exc_info
-from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest, StopDownload
+from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
 from scrapy import signals
 from scrapy.http import Request, Response
 from scrapy.item import _BaseItem
@@ -147,14 +147,6 @@ class Scraper:
 
     def call_spider(self, result, request, spider):
         result.request = request
-        # StopDownload exceptions: make the partial response an attribute of the failure
-        if (
-            isinstance(result, Failure)
-            and isinstance(result.value, StopDownload)
-            and hasattr(result.value, "response")
-        ):
-            result.response = result.value.response
-            delattr(result.value, "response")
         dfd = defer_result(result)
         callback = request.callback or spider.parse
         warn_on_generator_with_return_value(spider, callback)
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index e2426e0da..0115b8fb9 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -488,6 +488,10 @@ with multiples lines
         self.assertIsNone(crawler.spider.meta.get("response"))
         self.assertIsInstance(crawler.spider.meta["failure"], Failure)
         self.assertIsInstance(crawler.spider.meta["failure"].value, StopDownload)
-        self.assertIsInstance(crawler.spider.meta["failure"].response, Response)
-        self.assertEqual(crawler.spider.meta["failure"].response.body, crawler.spider.meta.get("bytes_received"))
-        self.assertLess(len(crawler.spider.meta["failure"].response.body), crawler.spider.full_response_length)
+        self.assertIsInstance(crawler.spider.meta["failure"].value.response, Response)
+        self.assertEqual(
+            crawler.spider.meta["failure"].value.response.body,
+            crawler.spider.meta.get("bytes_received"))
+        self.assertLess(
+            len(crawler.spider.meta["failure"].value.response.body),
+            crawler.spider.full_response_length)

From 44d5801b05280ad633e63a6b25db264d6586d882 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Wed, 27 May 2020 11:13:21 -0300
Subject: [PATCH 159/181] Fix KeyError in LocalWeakReferencedCache

---
 scrapy/utils/datatypes.py     | 4 ++--
 tests/test_utils_datatypes.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/scrapy/utils/datatypes.py b/scrapy/utils/datatypes.py
index f59f4cc55..2a92d0588 100644
--- a/scrapy/utils/datatypes.py
+++ b/scrapy/utils/datatypes.py
@@ -105,8 +105,8 @@ class LocalWeakReferencedCache(weakref.WeakKeyDictionary):
     def __getitem__(self, key):
         try:
             return super(LocalWeakReferencedCache, self).__getitem__(key)
-        except TypeError:
-            return None  # key is not weak-referenceable, it's not cached
+        except (TypeError, KeyError):
+            return None  # key is either not weak-referenceable or not cached
 
 
 class SequenceExclude:
diff --git a/tests/test_utils_datatypes.py b/tests/test_utils_datatypes.py
index 0a4c6034a..aa18ef1f3 100644
--- a/tests/test_utils_datatypes.py
+++ b/tests/test_utils_datatypes.py
@@ -271,6 +271,7 @@ class LocalWeakReferencedCacheTest(unittest.TestCase):
         self.assertNotIn(r1, cache)
         self.assertIn(r2, cache)
         self.assertIn(r3, cache)
+        self.assertEqual(cache[r1], None)
         self.assertEqual(cache[r2], 2)
         self.assertEqual(cache[r3], 3)
         del r2

From 5cef927944152f11d296566767c1d794b34b38e4 Mon Sep 17 00:00:00 2001
From: Bulat Khabibullin <xb.bolo@gmail.com>
Date: Mon, 1 Jun 2020 07:57:23 +0300
Subject: [PATCH 160/181] Implement TextResponse.json() (#4574)

---
 docs/topics/request-response.rst |  5 +++++
 scrapy/http/response/text.py     | 12 ++++++++++++
 tests/test_http_response.py      | 21 +++++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index ae25ff7e4..bbd715766 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -879,6 +879,11 @@ TextResponse objects
 
     .. automethod:: TextResponse.follow_all
 
+    .. automethod:: TextResponse.json()
+
+        Returns a Python object from deserialized JSON document.
+        The result is cached after the first call.
+
 
 HtmlResponse objects
 --------------------
diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
index 0603b6653..40cf3f483 100644
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@@ -5,6 +5,7 @@ discovering (through HTTP headers) to base Response class.
 See documentation in docs/topics/request-response.rst
 """
 
+import json
 import warnings
 from contextlib import suppress
 from typing import Generator
@@ -21,10 +22,13 @@ from scrapy.http.response import Response
 from scrapy.utils.python import memoizemethod_noargs, to_unicode
 from scrapy.utils.response import get_base_url
 
+_NONE = object()
+
 
 class TextResponse(Response):
 
     _DEFAULT_ENCODING = 'ascii'
+    _cached_decoded_json = _NONE
 
     def __init__(self, *args, **kwargs):
         self._encoding = kwargs.pop('encoding', None)
@@ -68,6 +72,14 @@ class TextResponse(Response):
                       ScrapyDeprecationWarning, stacklevel=2)
         return self.text
 
+    def json(self):
+        """
+        Deserialize a JSON document to a Python object.
+        """
+        if self._cached_decoded_json is _NONE:
+            self._cached_decoded_json = json.loads(self.text)
+        return self._cached_decoded_json
+
     @property
     def text(self):
         """ Body as unicode """
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index 039e863f4..e0ca3c0e6 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -1,4 +1,5 @@
 import unittest
+from unittest import mock
 from warnings import catch_warnings
 
 from w3lib.encoding import resolve_encoding
@@ -685,6 +686,26 @@ class TextResponseTest(BaseResponseTest):
             self.assertEqual(len(warnings), 1)
             self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
 
+    def test_json_response(self):
+        json_body = b"""{"ip": "109.187.217.200"}"""
+        json_response = self.response_class("http://www.example.com", body=json_body)
+        self.assertEqual(json_response.json(), {'ip': '109.187.217.200'})
+
+        text_body = b"""<html><body>text</body></html>"""
+        text_response = self.response_class("http://www.example.com", body=text_body)
+        with self.assertRaises(ValueError):
+            text_response.json()
+
+    def test_cache_json_response(self):
+        json_valid_bodies = [b"""{"ip": "109.187.217.200"}""", b"""null"""]
+        for json_body in json_valid_bodies:
+            json_response = self.response_class("http://www.example.com", body=json_body)
+
+            with mock.patch('json.loads') as mock_json:
+                for _ in range(2):
+                    json_response.json()
+                mock_json.assert_called_once_with(json_body.decode())
+
 
 class HtmlResponseTest(TextResponseTest):
 

From 91e505edfe811c8faa6e457ecdae42c3644e6537 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Tue, 2 Jun 2020 05:32:14 -0300
Subject: [PATCH 161/181] Return single element from coroutine callback (#4609)

---
 scrapy/utils/spider.py |  8 ++++++--
 tests/spiders.py       | 11 +++++++++++
 tests/test_crawl.py    | 16 ++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/scrapy/utils/spider.py b/scrapy/utils/spider.py
index 1b8a82829..7e7a50c88 100644
--- a/scrapy/utils/spider.py
+++ b/scrapy/utils/spider.py
@@ -1,5 +1,5 @@
-import logging
 import inspect
+import logging
 
 from scrapy.spiders import Spider
 from scrapy.utils.defer import deferred_from_coro
@@ -18,7 +18,11 @@ def iterate_spider_output(result):
         d = deferred_from_coro(collect_asyncgen(result))
         d.addCallback(iterate_spider_output)
         return d
-    return arg_to_iter(deferred_from_coro(result))
+    elif inspect.iscoroutine(result):
+        d = deferred_from_coro(result)
+        d.addCallback(iterate_spider_output)
+        return d
+    return arg_to_iter(result)
 
 
 def iter_spider_classes(module):
diff --git a/tests/spiders.py b/tests/spiders.py
index 05078cc04..a360d8206 100644
--- a/tests/spiders.py
+++ b/tests/spiders.py
@@ -119,6 +119,17 @@ class AsyncDefAsyncioReturnSpider(SimpleSpider):
         return [{'id': 1}, {'id': 2}]
 
 
+class AsyncDefAsyncioReturnSingleElementSpider(SimpleSpider):
+
+    name = "asyncdef_asyncio_return_single_element"
+
+    async def parse(self, response):
+        await asyncio.sleep(0.1)
+        status = await get_from_asyncio_queue(response.status)
+        self.logger.info("Got response %d" % status)
+        return {"foo": 42}
+
+
 class AsyncDefAsyncioReqsReturnSpider(SimpleSpider):
 
     name = 'asyncdef_asyncio_reqs_return'
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 0115b8fb9..df920f2a2 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -21,6 +21,7 @@ from scrapy.utils.python import to_unicode
 from tests.mockserver import MockServer
 from tests.spiders import (
     AsyncDefAsyncioReqsReturnSpider,
+    AsyncDefAsyncioReturnSingleElementSpider,
     AsyncDefAsyncioReturnSpider,
     AsyncDefAsyncioSpider,
     AsyncDefSpider,
@@ -364,6 +365,21 @@ with multiples lines
         self.assertIn({'id': 1}, items)
         self.assertIn({'id': 2}, items)
 
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncio_parse_items_single_element(self):
+        items = []
+
+        def _on_item_scraped(item):
+            items.append(item)
+
+        crawler = self.runner.create_crawler(AsyncDefAsyncioReturnSingleElementSpider)
+        crawler.signals.connect(_on_item_scraped, signals.item_scraped)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.assertIn("Got response 200", str(log))
+        self.assertIn({"foo": 42}, items)
+
     @mark.skipif(sys.version_info < (3, 6), reason="Async generators require Python 3.6 or higher")
     @mark.only_asyncio()
     @defer.inlineCallbacks

From 0cabf406d5c50060ac2da4e5c76d704606bee990 Mon Sep 17 00:00:00 2001
From: Matthias Meschede <MMesch@users.noreply.github.com>
Date: Mon, 1 Jun 2020 17:41:52 +0200
Subject: [PATCH 162/181] set write permission to startproject folder

---
 scrapy/commands/startproject.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index ebe3a9c2c..cfaa25154 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -4,6 +4,7 @@ import string
 from importlib import import_module
 from os.path import join, exists, abspath
 from shutil import ignore_patterns, move, copy2, copystat
+import stat
 
 import scrapy
 from scrapy.commands import ScrapyCommand
@@ -79,6 +80,28 @@ class Command(ScrapyCommand):
                 copy2(srcname, dstname)
         copystat(src, dst)
 
+    def _set_rw_permissions(self, path):
+        """
+        Sets permissions of a directory tree to +rw and +rwx for folders.
+        This is necessary if the start template files come without write
+        permissions.
+        """
+        mode_rw = (stat.S_IRUSR
+                   | stat.S_IWUSR
+                   | stat.S_IRGRP
+                   | stat.S_IROTH)
+
+        mode_x = (stat.S_IXUSR
+                  | stat.S_IXGRP
+                  | stat.S_IXOTH)
+
+        os.chmod(path, mode_rw | mode_x)
+        for root, dirs, files in os.walk(path):
+            for dir in dirs:
+                os.chmod(join(root, dir), mode_rw | mode_x)
+            for file in files:
+                os.chmod(join(root, file), mode_rw)
+
     def run(self, args, opts):
         if len(args) not in (1, 2):
             raise UsageError()
@@ -99,6 +122,9 @@ class Command(ScrapyCommand):
             return
 
         self._copytree(self.templates_dir, abspath(project_dir))
+
+        self._set_rw_permissions(abspath(project_dir))
+
         move(join(project_dir, 'module'), join(project_dir, project_name))
         for paths in TEMPLATES_TO_RENDER:
             path = join(*paths)

From 2df3b54c7d2666a95793ea6788e88d1ae945e0f5 Mon Sep 17 00:00:00 2001
From: Matthias Meschede <MMesch@users.noreply.github.com>
Date: Fri, 5 Jun 2020 09:29:05 +0200
Subject: [PATCH 163/181] refactor

---
 scrapy/commands/startproject.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index cfaa25154..3d1f5914f 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -79,6 +79,7 @@ class Command(ScrapyCommand):
             else:
                 copy2(srcname, dstname)
         copystat(src, dst)
+        self._set_rw_permissions(dst)
 
     def _set_rw_permissions(self, path):
         """
@@ -123,8 +124,6 @@ class Command(ScrapyCommand):
 
         self._copytree(self.templates_dir, abspath(project_dir))
 
-        self._set_rw_permissions(abspath(project_dir))
-
         move(join(project_dir, 'module'), join(project_dir, project_name))
         for paths in TEMPLATES_TO_RENDER:
             path = join(*paths)

From 76a2cbf0ff7a060833812704a0416ba617ddc8b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Tue, 9 Jun 2020 21:30:19 +0200
Subject: [PATCH 164/181] Apply minor style changes

---
 scrapy/commands/startproject.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index 3d1f5914f..852281959 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -1,10 +1,10 @@
 import re
 import os
+import stat
 import string
 from importlib import import_module
 from os.path import join, exists, abspath
 from shutil import ignore_patterns, move, copy2, copystat
-import stat
 
 import scrapy
 from scrapy.commands import ScrapyCommand
@@ -123,7 +123,6 @@ class Command(ScrapyCommand):
             return
 
         self._copytree(self.templates_dir, abspath(project_dir))
-
         move(join(project_dir, 'module'), join(project_dir, project_name))
         for paths in TEMPLATES_TO_RENDER:
             path = join(*paths)

From 8b549392f924ddad9536e55c6120638daf688dfd Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Thu, 11 Jun 2020 09:53:59 -0300
Subject: [PATCH 165/181] Bump minimum Python version to 3.5.2 (#4615)

---
 .travis.yml            | 6 ++----
 README.rst             | 2 +-
 docs/faq.rst           | 2 +-
 docs/intro/install.rst | 2 +-
 scrapy/__init__.py     | 4 ++--
 setup.py               | 2 +-
 6 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d6ec88e06..e44f85237 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,11 +18,9 @@ matrix:
 
     - env: TOXENV=pypy3
     - env: TOXENV=pinned
-      python: 3.5.1
-      dist: trusty
+      python: 3.5.2
     - env: TOXENV=asyncio
-      python: 3.5.1  # We use additional code to support 3.5.3 and earlier
-      dist: trusty
+      python: 3.5.2  # We use additional code to support 3.5.3 and earlier
     - env: TOXENV=py
       python: 3.5
     - env: TOXENV=asyncio
diff --git a/README.rst b/README.rst
index fd84e127e..0e3939e9b 100644
--- a/README.rst
+++ b/README.rst
@@ -40,7 +40,7 @@ including a list of features.
 Requirements
 ============
 
-* Python 3.5.1+
+* Python 3.5.2+
 * Works on Linux, Windows, macOS, BSD
 
 Install
diff --git a/docs/faq.rst b/docs/faq.rst
index c06cb945b..9cdb7d09d 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -69,7 +69,7 @@ Here's an example spider using BeautifulSoup API, with ``lxml`` as the HTML pars
 What Python versions does Scrapy support?
 -----------------------------------------
 
-Scrapy is supported under Python 3.5.1+
+Scrapy is supported under Python 3.5.2+
 under CPython (default Python implementation) and PyPy (starting with PyPy 5.9).
 Python 3 support was added in Scrapy 1.1.
 PyPy support was added in Scrapy 1.4, PyPy3 support was added in Scrapy 1.5.
diff --git a/docs/intro/install.rst b/docs/intro/install.rst
index 4af80d801..fb64d443c 100644
--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@@ -7,7 +7,7 @@ Installation guide
 Installing Scrapy
 =================
 
-Scrapy runs on Python 3.5.1 or above under CPython (default Python
+Scrapy runs on Python 3.5.2 or above under CPython (default Python
 implementation) and PyPy (starting with PyPy 5.9).
 
 If you're using `Anaconda`_ or `Miniconda`_, you can install the package from
diff --git a/scrapy/__init__.py b/scrapy/__init__.py
index e791deaa6..f0259a9b7 100644
--- a/scrapy/__init__.py
+++ b/scrapy/__init__.py
@@ -28,8 +28,8 @@ twisted_version = (_txv.major, _txv.minor, _txv.micro)
 
 
 # Check minimum required Python version
-if sys.version_info < (3, 5):
-    print("Scrapy %s requires Python 3.5" % __version__)
+if sys.version_info < (3, 5, 2):
+    print("Scrapy %s requires Python 3.5.2" % __version__)
     sys.exit(1)
 
 
diff --git a/setup.py b/setup.py
index 1b3c6771a..71dc3232d 100644
--- a/setup.py
+++ b/setup.py
@@ -66,7 +66,7 @@ setup(
         'Topic :: Software Development :: Libraries :: Application Frameworks',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
-    python_requires='>=3.5',
+    python_requires='>=3.5.2',
     install_requires=[
         'Twisted>=17.9.0',
         'cryptography>=2.0',

From ec98dabfab60283303a9208ccd8177d9f995ba72 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Sun, 14 Jun 2020 06:45:27 -0300
Subject: [PATCH 166/181] Support for dataclass and attrs items (#3881)

---
 docs/conf.py                                  |   1 +
 docs/faq.rst                                  |   8 +-
 docs/topics/architecture.rst                  |   2 +-
 docs/topics/coroutines.rst                    |  13 +-
 docs/topics/exporters.rst                     |  34 ++-
 docs/topics/feed-exports.rst                  |   4 +-
 docs/topics/item-pipeline.rst                 |  54 ++--
 docs/topics/items.rst                         | 241 ++++++++++++++----
 docs/topics/leaks.rst                         |  15 +-
 docs/topics/loaders.rst                       |  53 ++--
 docs/topics/media-pipeline.rst                |  33 ++-
 docs/topics/settings.rst                      |   4 +-
 docs/topics/signals.rst                       |  10 +-
 docs/topics/spider-middleware.rst             |  13 +-
 docs/topics/spiders.rst                       |  18 +-
 scrapy/commands/parse.py                      |   6 +-
 scrapy/contracts/default.py                   |  28 +-
 scrapy/core/scraper.py                        |  27 +-
 scrapy/exporters.py                           |  38 +--
 scrapy/item.py                                |   2 +-
 scrapy/loader/__init__.py                     |  18 +-
 scrapy/pipelines/files.py                     |  23 +-
 scrapy/pipelines/images.py                    |  19 +-
 scrapy/shell.py                               |  11 +-
 scrapy/spiders/feed.py                        |   2 +-
 .../project/module/middlewares.py.tmpl        |   8 +-
 .../project/module/pipelines.py.tmpl          |   4 +
 scrapy/utils/serialize.py                     |   6 +-
 setup.py                                      |   1 +
 tests/requirements-py3.txt                    |   2 +
 tests/test_engine.py                          |  40 ++-
 tests/test_loader.py                          |  47 +++-
 tests/test_pipeline_files.py                  | 144 ++++++++---
 tests/test_pipeline_images.py                 | 129 +++++++---
 tests/test_utils_serialize.py                 |  43 +++-
 tox.ini                                       |   3 +-
 36 files changed, 753 insertions(+), 351 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 29b2fc406..86734fae7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -281,6 +281,7 @@ coverage_ignore_pyobjects = [
 # -------------------------------------
 
 intersphinx_mapping = {
+    'attrs': ('https://www.attrs.org/en/stable/', None),
     'coverage': ('https://coverage.readthedocs.io/en/stable', None),
     'cssselect': ('https://cssselect.readthedocs.io/en/latest', None),
     'pytest': ('https://docs.pytest.org/en/latest', None),
diff --git a/docs/faq.rst b/docs/faq.rst
index 9cdb7d09d..d5ea3cb87 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -342,15 +342,15 @@ method for this purpose. For example::
 
     from copy import deepcopy
 
-    from scrapy.item import Item
-
+    from itemadapter import is_item, ItemAdapter
 
     class MultiplyItemsMiddleware:
 
         def process_spider_output(self, response, result, spider):
             for item in result:
-                if isinstance(item, (Item, dict)):
-                    for _ in range(item['multiply_by']):
+                if is_item(item):
+                    adapter = ItemAdapter(item)
+                    for _ in range(adapter['multiply_by']):
                         yield deepcopy(item)
 
 Does Scrapy support IPv6 addresses?
diff --git a/docs/topics/architecture.rst b/docs/topics/architecture.rst
index ae25dfa2f..074c59241 100644
--- a/docs/topics/architecture.rst
+++ b/docs/topics/architecture.rst
@@ -104,7 +104,7 @@ Spiders
 -------
 
 Spiders are custom classes written by Scrapy users to parse responses and
-extract items (aka scraped items) from them or additional requests to
+extract :ref:`items <topics-items>` from them or additional requests to
 follow. For more information see :ref:`topics-spiders`.
 
 .. _component-pipelines:
diff --git a/docs/topics/coroutines.rst b/docs/topics/coroutines.rst
index 7a9ecd4d5..a0952d323 100644
--- a/docs/topics/coroutines.rst
+++ b/docs/topics/coroutines.rst
@@ -53,21 +53,28 @@ There are several use cases for coroutines in Scrapy. Code that would
 return Deferreds when written for previous Scrapy versions, such as downloader
 middlewares and signal handlers, can be rewritten to be shorter and cleaner::
 
+    from itemadapter import ItemAdapter
+
     class DbPipeline:
         def _update_item(self, data, item):
-            item['field'] = data
+            adapter = ItemAdapter(item)
+            adapter['field'] = data
             return item
 
         def process_item(self, item, spider):
-            dfd = db.get_some_data(item['id'])
+            adapter = ItemAdapter(item)
+            dfd = db.get_some_data(adapter['id'])
             dfd.addCallback(self._update_item, item)
             return dfd
 
 becomes::
 
+    from itemadapter import ItemAdapter
+
     class DbPipeline:
         async def process_item(self, item, spider):
-            item['field'] = await db.get_some_data(item['id'])
+            adapter = ItemAdapter(item)
+            adapter['field'] = await db.get_some_data(adapter['id'])
             return item
 
 Coroutines may be used to call asynchronous code. This includes other
diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index 7daf25ab3..e5c99e5b1 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -40,6 +40,7 @@ Here you can see an :doc:`Item Pipeline <item-pipeline>` which uses multiple
 Item Exporters to group scraped items to different files according to the
 value of one of their fields::
 
+    from itemadapter import ItemAdapter
     from scrapy.exporters import XmlItemExporter
 
     class PerYearXmlExportPipeline:
@@ -53,7 +54,8 @@ value of one of their fields::
                 exporter.finish_exporting()
 
         def _exporter_for_item(self, item):
-            year = item['year']
+            adapter = ItemAdapter(item)
+            year = adapter['year']
             if year not in self.year_to_exporter:
                 f = open('{}.xml'.format(year), 'wb')
                 exporter = XmlItemExporter(f)
@@ -167,9 +169,10 @@ BaseItemExporter
       value unchanged except for ``unicode`` values which are encoded to
       ``str`` using the encoding declared in the :attr:`encoding` attribute.
 
-      :param field: the field being serialized. If a raw dict is being
-          exported (not :class:`~.Item`) *field* value is an empty dict.
-      :type field: :class:`~scrapy.item.Field` object or an empty dict
+      :param field: the field being serialized. If the source :ref:`item object
+          <item-types>` does not define field metadata, *field* is an empty
+          :class:`dict`.
+      :type field: :class:`~scrapy.item.Field` object or a :class:`dict` instance
 
       :param name: the name of the field being serialized
       :type name: str
@@ -192,14 +195,17 @@ BaseItemExporter
 
    .. attribute:: fields_to_export
 
-      A list with the name of the fields that will be exported, or None if you
-      want to export all fields. Defaults to None.
+      A list with the name of the fields that will be exported, or ``None`` if
+      you want to export all fields. Defaults to ``None``.
 
       Some exporters (like :class:`CsvItemExporter`) respect the order of the
       fields defined in this attribute.
 
-      Some exporters may require fields_to_export list in order to export the
-      data properly when spiders return dicts (not :class:`~Item` instances).
+      When using :ref:`item objects <item-types>` that do not expose all their
+      possible fields, exporters that do not support exporting a different
+      subset of fields per item will only export the fields found in the first
+      item exported. Use ``fields_to_export`` to define all the fields to be
+      exported.
 
    .. attribute:: export_empty_fields
 
@@ -238,7 +244,7 @@ XmlItemExporter
 
 .. class:: XmlItemExporter(file, item_element='item', root_element='items', **kwargs)
 
-   Exports Items in XML format to the specified file object.
+   Exports items in XML format to the specified file object.
 
    :param file: the file-like object to use for exporting the data. Its ``write`` method should
                 accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@@ -292,7 +298,7 @@ CsvItemExporter
 
 .. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', **kwargs)
 
-   Exports Items in CSV format to the given file-like object. If the
+   Exports items in CSV format to the given file-like object. If the
    :attr:`fields_to_export` attribute is set, it will be used to define the
    CSV columns and their order. The :attr:`export_empty_fields` attribute has
    no effect on this exporter.
@@ -325,7 +331,7 @@ PickleItemExporter
 
 .. class:: PickleItemExporter(file, protocol=0, **kwargs)
 
-   Exports Items in pickle format to the given file-like object.
+   Exports items in pickle format to the given file-like object.
 
    :param file: the file-like object to use for exporting the data. Its ``write`` method should
                 accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@@ -345,7 +351,7 @@ PprintItemExporter
 
 .. class:: PprintItemExporter(file, **kwargs)
 
-   Exports Items in pretty print format to the specified file object.
+   Exports items in pretty print format to the specified file object.
 
    :param file: the file-like object to use for exporting the data. Its ``write`` method should
                 accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@@ -365,7 +371,7 @@ JsonItemExporter
 
 .. class:: JsonItemExporter(file, **kwargs)
 
-   Exports Items in JSON format to the specified file-like object, writing all
+   Exports items in JSON format to the specified file-like object, writing all
    objects as a list of objects. The additional ``__init__`` method arguments are
    passed to the :class:`BaseItemExporter` ``__init__`` method, and the leftover
    arguments to the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
@@ -394,7 +400,7 @@ JsonLinesItemExporter
 
 .. class:: JsonLinesItemExporter(file, **kwargs)
 
-   Exports Items in JSON format to the specified file-like object, writing one
+   Exports items in JSON format to the specified file-like object, writing one
    JSON-encoded item per line. The additional ``__init__`` method arguments are passed
    to the :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to
    the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst
index 9e5968a29..24d69040c 100644
--- a/docs/topics/feed-exports.rst
+++ b/docs/topics/feed-exports.rst
@@ -298,8 +298,8 @@ Example: ``FEED_EXPORT_FIELDS = ["foo", "bar", "baz"]``.
 
 Use FEED_EXPORT_FIELDS option to define fields to export and their order.
 
-When FEED_EXPORT_FIELDS is empty or None (default), Scrapy uses fields
-defined in dicts or :class:`~.Item` subclasses a spider is yielding.
+When FEED_EXPORT_FIELDS is empty or None (default), Scrapy uses the fields
+defined in :ref:`item objects <topics-items>` yielded by your spider.
 
 If an exporter requires a fixed set of fields (this is the case for
 :ref:`CSV <topics-feed-format-csv>` export format) and FEED_EXPORT_FIELDS
diff --git a/docs/topics/item-pipeline.rst b/docs/topics/item-pipeline.rst
index c9194caa1..cd6a6d47e 100644
--- a/docs/topics/item-pipeline.rst
+++ b/docs/topics/item-pipeline.rst
@@ -27,15 +27,19 @@ Each item pipeline component is a Python class that must implement the following
 
 .. method:: process_item(self, item, spider)
 
-   This method is called for every item pipeline component. :meth:`process_item`
-   must either: return a dict with data, return an :class:`~scrapy.item.Item`
-   (or any descendant class) object, return a
-   :class:`~twisted.internet.defer.Deferred` or raise
-   :exc:`~scrapy.exceptions.DropItem` exception. Dropped items are no longer
-   processed by further pipeline components.
+   This method is called for every item pipeline component.
 
-   :param item: the item scraped
-   :type item: :class:`~scrapy.item.Item` object or a dict
+   `item` is an :ref:`item object <item-types>`, see
+   :ref:`supporting-item-types`.
+
+   :meth:`process_item` must either: return an :ref:`item object <item-types>`,
+   return a :class:`~twisted.internet.defer.Deferred` or raise a
+   :exc:`~scrapy.exceptions.DropItem` exception.
+
+   Dropped items are no longer processed by further pipeline components.
+
+   :param item: the scraped item
+   :type item: :ref:`item object <item-types>`
 
    :param spider: the spider which scraped the item
    :type spider: :class:`~scrapy.spiders.Spider` object
@@ -79,16 +83,17 @@ Let's take a look at the following hypothetical pipeline that adjusts the
 (``price_excludes_vat`` attribute), and drops those items which don't
 contain a price::
 
+    from itemadapter import ItemAdapter
     from scrapy.exceptions import DropItem
-
     class PricePipeline:
 
         vat_factor = 1.15
 
         def process_item(self, item, spider):
-            if item.get('price'):
-                if item.get('price_excludes_vat'):
-                    item['price'] = item['price'] * self.vat_factor
+            adapter = ItemAdapter(item)
+            if adapter.get('price'):
+                if adapter.get('price_excludes_vat'):
+                    adapter['price'] = adapter['price'] * self.vat_factor
                 return item
             else:
                 raise DropItem("Missing price in %s" % item)
@@ -103,6 +108,8 @@ format::
 
    import json
 
+   from itemadapter import ItemAdapter
+
    class JsonWriterPipeline:
 
        def open_spider(self, spider):
@@ -112,7 +119,7 @@ format::
            self.file.close()
 
        def process_item(self, item, spider):
-           line = json.dumps(dict(item)) + "\n"
+           line = json.dumps(ItemAdapter(item).asdict()) + "\n"
            self.file.write(line)
            return item
 
@@ -131,6 +138,7 @@ The main point of this example is to show how to use :meth:`from_crawler`
 method and how to clean up the resources properly.::
 
     import pymongo
+    from itemadapter import ItemAdapter
 
     class MongoPipeline:
 
@@ -155,7 +163,7 @@ method and how to clean up the resources properly.::
             self.client.close()
 
         def process_item(self, item, spider):
-            self.db[self.collection_name].insert_one(dict(item))
+            self.db[self.collection_name].insert_one(ItemAdapter(item).asdict())
             return item
 
 .. _MongoDB: https://www.mongodb.com/
@@ -177,10 +185,11 @@ item.
 
 ::
 
-    import scrapy
     import hashlib
     from urllib.parse import quote
 
+    import scrapy
+    from itemadapter import ItemAdapter
 
     class ScreenshotPipeline:
         """Pipeline that uses Splash to render screenshot of
@@ -189,7 +198,8 @@ item.
         SPLASH_URL = "http://localhost:8050/render.png?url={}"
 
         async def process_item(self, item, spider):
-            encoded_item_url = quote(item["url"])
+            adapter = ItemAdapter(item)
+            encoded_item_url = quote(adapter["url"])
             screenshot_url = self.SPLASH_URL.format(encoded_item_url)
             request = scrapy.Request(screenshot_url)
             response = await spider.crawler.engine.download(request, spider)
@@ -199,14 +209,14 @@ item.
                 return item
 
             # Save screenshot to file, filename will be hash of url.
-            url = item["url"]
+            url = adapter["url"]
             url_hash = hashlib.md5(url.encode("utf8")).hexdigest()
             filename = "{}.png".format(url_hash)
             with open(filename, "wb") as f:
                 f.write(response.body)
 
             # Store filename in item.
-            item["screenshot_filename"] = filename
+            adapter["screenshot_filename"] = filename
             return item
 
 .. _Splash: https://splash.readthedocs.io/en/stable/
@@ -219,6 +229,7 @@ already processed. Let's say that our items have a unique id, but our spider
 returns multiples items with the same id::
 
 
+    from itemadapter import ItemAdapter
     from scrapy.exceptions import DropItem
 
     class DuplicatesPipeline:
@@ -227,10 +238,11 @@ returns multiples items with the same id::
             self.ids_seen = set()
 
         def process_item(self, item, spider):
-            if item['id'] in self.ids_seen:
-                raise DropItem("Duplicate item found: %s" % item)
+            adapter = ItemAdapter(item)
+            if adapter['id'] in self.ids_seen:
+                raise DropItem("Duplicate item found: %r" % item)
             else:
-                self.ids_seen.add(item['id'])
+                self.ids_seen.add(adapter['id'])
                 return item
 
 
diff --git a/docs/topics/items.rst b/docs/topics/items.rst
index 0941a8a1b..65bf156ac 100644
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@@ -8,29 +8,155 @@ Items
    :synopsis: Item and Field classes
 
 The main goal in scraping is to extract structured data from unstructured
-sources, typically, web pages. Scrapy spiders can return the extracted data
-as Python dicts. While convenient and familiar, Python dicts lack structure:
-it is easy to make a typo in a field name or return inconsistent data,
-especially in a larger project with many spiders.
+sources, typically, web pages. :ref:`Spiders <topics-spiders>` may return the
+extracted data as `items`, Python objects that define key-value pairs.
 
-To define common output data format Scrapy provides the :class:`Item` class.
-:class:`Item` objects are simple containers used to collect the scraped data.
-They provide an API similar to :class:`dict` API with a convenient syntax
-for declaring their available fields.
+Scrapy supports :ref:`multiple types of items <item-types>`. When you create an
+item, you may use whichever type of item you want. When you write code that
+receives an item, your code should :ref:`work for any item type
+<supporting-item-types>`.
 
-Various Scrapy components use extra information provided by Items:
-exporters look at declared fields to figure out columns to export,
-serialization can be customized using Item fields metadata, :mod:`trackref`
-tracks Item instances to help find memory leaks
-(see :ref:`topics-leaks-trackrefs`), etc.
+.. _item-types:
+
+Item Types
+==========
+
+Scrapy supports the following types of items, via the `itemadapter`_ library:
+:ref:`dictionaries <dict-items>`, :ref:`Item objects <item-objects>`,
+:ref:`dataclass objects <dataclass-items>`, and :ref:`attrs objects <attrs-items>`.
+
+.. _itemadapter: https://github.com/scrapy/itemadapter
+
+.. _dict-items:
+
+Dictionaries
+------------
+
+As an item type, :class:`dict` is convenient and familiar.
+
+.. _item-objects:
+
+Item objects
+------------
+
+:class:`Item` provides a :class:`dict`-like API plus additional features that
+make it the most feature-complete item type:
+
+.. class:: Item([arg])
+
+    :class:`Item` objects replicate the standard :class:`dict` API, including
+    its ``__init__`` method.
+
+    :class:`Item` allows defining field names, so that:
+
+    -   :class:`KeyError` is raised when using undefined field names (i.e.
+        prevents typos going unnoticed)
+
+    -   :ref:`Item exporters <topics-exporters>` can export all fields by
+        default even if the first scraped object does not have values for all
+        of them
+
+    :class:`Item` also allows defining field metadata, which can be used to
+    :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+    :mod:`trackref` tracks :class:`Item` objects to help find memory leaks
+    (see :ref:`topics-leaks-trackrefs`).
+
+    :class:`Item` objects also provide the following additional API members:
+
+    .. automethod:: copy
+
+    .. automethod:: deepcopy
+
+    .. attribute:: fields
+
+        A dictionary containing *all declared fields* for this Item, not only
+        those populated. The keys are the field names and the values are the
+        :class:`Field` objects used in the :ref:`Item declaration
+        <topics-items-declaring>`.
+
+Example::
+
+    from scrapy.item import Item, Field
+
+    class CustomItem(Item):
+        one_field = Field()
+        another_field = Field()
+
+.. _dataclass-items:
+
+Dataclass objects
+-----------------
+
+.. versionadded:: 2.2
+
+:func:`~dataclasses.dataclass` allows defining item classes with field names,
+so that :ref:`item exporters <topics-exporters>` can export all fields by
+default even if the first scraped object does not have values for all of them.
+
+Additionally, ``dataclass`` items also allow to:
+
+* define the type and default value of each defined field.
+
+* define custom field metadata through :func:`dataclasses.field`, which can be used to
+  :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+They work natively in Python 3.7 or later, or using the `dataclasses
+backport`_ in Python 3.6.
+
+.. _dataclasses backport: https://pypi.org/project/dataclasses/
+
+Example::
+
+    from dataclasses import dataclass
+
+    @dataclass
+    class CustomItem:
+        one_field: str
+        another_field: int
+
+.. note:: Field types are not enforced at run time.
+
+.. _attrs-items:
+
+attr.s objects
+--------------
+
+.. versionadded:: 2.2
+
+:func:`attr.s` allows defining item classes with field names,
+so that :ref:`item exporters <topics-exporters>` can export all fields by
+default even if the first scraped object does not have values for all of them.
+
+Additionally, ``attr.s`` items also allow to:
+
+* define the type and default value of each defined field.
+
+* define custom field :ref:`metadata <attrs:metadata>`, which can be used to
+  :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+In order to use this type, the :doc:`attrs package <attrs:index>` needs to be installed.
+
+Example::
+
+    import attr
+
+    @attr.s
+    class CustomItem:
+        one_field = attr.ib()
+        another_field = attr.ib()
+
+
+Working with Item objects
+=========================
 
 .. _topics-items-declaring:
 
-Declaring Items
-===============
+Declaring Item subclasses
+-------------------------
 
-Items are declared using a simple class definition syntax and :class:`Field`
-objects. Here is an example::
+Item subclasses are declared using a simple class definition syntax and
+:class:`Field` objects. Here is an example::
 
     import scrapy
 
@@ -48,10 +174,11 @@ objects. Here is an example::
 .. _Django: https://www.djangoproject.com/
 .. _Django Models: https://docs.djangoproject.com/en/dev/topics/db/models/
 
+
 .. _topics-items-fields:
 
-Item Fields
-===========
+Declaring fields
+----------------
 
 :class:`Field` objects are used to specify metadata for each field. For
 example, the serializer function for the ``last_updated`` field illustrated in
@@ -72,15 +199,31 @@ It's important to note that the :class:`Field` objects used to declare the item
 do not stay assigned as class attributes. Instead, they can be accessed through
 the :attr:`Item.fields` attribute.
 
-Working with Items
-==================
+.. class:: Field([arg])
+
+    The :class:`Field` class is just an alias to the built-in :class:`dict` class and
+    doesn't provide any extra functionality or attributes. In other words,
+    :class:`Field` objects are plain-old Python dicts. A separate class is used
+    to support the :ref:`item declaration syntax <topics-items-declaring>`
+    based on class attributes.
+
+.. note:: Field metadata can also be declared for ``dataclass`` and ``attrs``
+    items. Please refer to the documentation for `dataclasses.field`_ and
+    `attr.ib`_ for additional information.
+
+    .. _dataclasses.field: https://docs.python.org/3/library/dataclasses.html#dataclasses.field
+    .. _attr.ib: https://www.attrs.org/en/stable/api.html#attr.ib
+
+
+Working with Item objects
+-------------------------
 
 Here are some examples of common tasks performed with items, using the
 ``Product`` item :ref:`declared above  <topics-items-declaring>`. You will
 notice the API is very similar to the :class:`dict` API.
 
 Creating items
---------------
+''''''''''''''
 
 >>> product = Product(name='Desktop PC', price=1000)
 >>> print(product)
@@ -88,7 +231,7 @@ Product(name='Desktop PC', price=1000)
 
 
 Getting field values
---------------------
+''''''''''''''''''''
 
 >>> product['name']
 Desktop PC
@@ -128,7 +271,7 @@ False
 
 
 Setting field values
---------------------
+''''''''''''''''''''
 
 >>> product['last_updated'] = 'today'
 >>> product['last_updated']
@@ -141,7 +284,7 @@ KeyError: 'Product does not support field: lala'
 
 
 Accessing all populated values
-------------------------------
+''''''''''''''''''''''''''''''
 
 To access all populated values, just use the typical :class:`dict` API:
 
@@ -155,7 +298,7 @@ To access all populated values, just use the typical :class:`dict` API:
 .. _copying-items:
 
 Copying items
--------------
+'''''''''''''
 
 To copy an item, you must first decide whether you want a shallow copy or a
 deep copy.
@@ -183,7 +326,7 @@ To create a deep copy, call :meth:`~scrapy.item.Item.deepcopy` instead
 
 
 Other common tasks
-------------------
+''''''''''''''''''
 
 Creating dicts from items:
 
@@ -201,8 +344,8 @@ Traceback (most recent call last):
 KeyError: 'Product does not support field: lala'
 
 
-Extending Items
-===============
+Extending Item subclasses
+-------------------------
 
 You can extend Items (to add more fields or to change some metadata for some
 fields) by declaring a subclass of your original Item.
@@ -222,39 +365,25 @@ appending more values, or changing existing values, like this::
 That adds (or replaces) the ``serializer`` metadata key for the ``name`` field,
 keeping all the previously existing metadata values.
 
-Item objects
-============
 
-.. class:: Item([arg])
+.. _supporting-item-types:
 
-    Return a new Item optionally initialized from the given argument.
+Supporting All Item Types
+=========================
 
-    Items replicate the standard :class:`dict` API, including its ``__init__``
-    method, and also provide the following additional API members:
+In code that receives an item, such as methods of :ref:`item pipelines
+<topics-item-pipeline>` or :ref:`spider middlewares
+<topics-spider-middleware>`, it is a good practice to use the
+:class:`~itemadapter.ItemAdapter` class and the
+:func:`~itemadapter.is_item` function to write code that works for
+any :ref:`supported item type <item-types>`:
 
-    .. automethod:: copy
+.. autoclass:: itemadapter.ItemAdapter
 
-    .. automethod:: deepcopy
+.. autofunction:: itemadapter.is_item
 
-    .. attribute:: fields
 
-        A dictionary containing *all declared fields* for this Item, not only
-        those populated. The keys are the field names and the values are the
-        :class:`Field` objects used in the :ref:`Item declaration
-        <topics-items-declaring>`.
-
-Field objects
-=============
-
-.. class:: Field([arg])
-
-    The :class:`Field` class is just an alias to the built-in :class:`dict` class and
-    doesn't provide any extra functionality or attributes. In other words,
-    :class:`Field` objects are plain-old Python dicts. A separate class is used
-    to support the :ref:`item declaration syntax <topics-items-declaring>`
-    based on class attributes.
-
-Other classes related to Item
-=============================
+Other classes related to items
+==============================
 
 .. autoclass:: ItemMeta
diff --git a/docs/topics/leaks.rst b/docs/topics/leaks.rst
index ceb708c7e..3224241fc 100644
--- a/docs/topics/leaks.rst
+++ b/docs/topics/leaks.rst
@@ -4,7 +4,7 @@
 Debugging memory leaks
 ======================
 
-In Scrapy, objects such as Requests, Responses and Items have a finite
+In Scrapy, objects such as requests, responses and items have a finite
 lifetime: they are created, used for a while, and finally destroyed.
 
 From all those objects, the Request is probably the one with the longest
@@ -61,8 +61,8 @@ Debugging memory leaks with ``trackref``
 ========================================
 
 :mod:`trackref` is a module provided by Scrapy to debug the most common cases of
-memory leaks. It basically tracks the references to all live Requests,
-Responses, Item and Selector objects.
+memory leaks. It basically tracks the references to all live Request,
+Response, Item, Spider and Selector objects.
 
 You can enter the telnet console and inspect how many objects (of the classes
 mentioned above) are currently alive using the ``prefs()`` function which is an
@@ -200,11 +200,10 @@ Debugging memory leaks with muppy
 
 ``trackref`` provides a very convenient mechanism for tracking down memory
 leaks, but it only keeps track of the objects that are more likely to cause
-memory leaks (Requests, Responses, Items, and Selectors). However, there are
-other cases where the memory leaks could come from other (more or less obscure)
-objects. If this is your case, and you can't find your leaks using ``trackref``,
-you still have another resource: the muppy library.
-
+memory leaks. However, there are other cases where the memory leaks could come
+from other (more or less obscure) objects. If this is your case, and you can't
+find your leaks using ``trackref``, you still have another resource: the muppy
+library.
 
 You can use muppy from `Pympler`_.
 
diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index eb804f1db..6645bf123 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -7,13 +7,12 @@ Item Loaders
 .. module:: scrapy.loader
    :synopsis: Item Loader class
 
-Item Loaders provide a convenient mechanism for populating scraped :ref:`Items
-<topics-items>`. Even though Items can be populated using their own
-dictionary-like API, Item Loaders provide a much more convenient API for
-populating them from a scraping process, by automating some common tasks like
-parsing the raw extracted data before assigning it.
+Item Loaders provide a convenient mechanism for populating scraped :ref:`items
+<topics-items>`. Even though items can be populated directly, Item Loaders provide a
+much more convenient API for populating them from a scraping process, by automating
+some common tasks like parsing the raw extracted data before assigning it.
 
-In other words, :ref:`Items <topics-items>` provide the *container* of
+In other words, :ref:`items <topics-items>` provide the *container* of
 scraped data, while Item Loaders provide the mechanism for *populating* that
 container.
 
@@ -25,10 +24,10 @@ Using Item Loaders to populate items
 ====================================
 
 To use an Item Loader, you must first instantiate it. You can either
-instantiate it with a dict-like object (e.g. Item or dict) or without one, in
-which case an Item is automatically instantiated in the Item Loader ``__init__`` method
-using the Item class specified in the :attr:`ItemLoader.default_item_class`
-attribute.
+instantiate it with an :ref:`item object <topics-items>` or without one, in which
+case an instance of :class:`~scrapy.item.Item` is automatically created in the
+Item Loader ``__init__`` method using the :class:`~scrapy.item.Item` subclass
+specified in the :attr:`ItemLoader.default_item_class` attribute.
 
 Then, you start collecting values into the Item Loader, typically using
 :ref:`Selectors <topics-selectors>`. You can add more than one value to
@@ -88,7 +87,7 @@ received (through the :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`
 :meth:`~ItemLoader.add_value` methods) and the result of the input processor is
 collected and kept inside the ItemLoader. After collecting all data, the
 :meth:`ItemLoader.load_item` method is called to populate and get the populated
-:class:`~scrapy.item.Item` object.  That's when the output processor is
+:ref:`item object <topics-items>`.  That's when the output processor is
 called with the data previously collected (and processed using the input
 processor). The result of the output processor is the final value that gets
 assigned to the item.
@@ -153,12 +152,10 @@ Last, but not least, Scrapy comes with some :ref:`commonly used processors
 <topics-loaders-available-processors>` built-in for convenience.
 
 
-
 Declaring Item Loaders
 ======================
 
-Item Loaders are declared like Items, by using a class definition syntax. Here
-is an example::
+Item Loaders are declared using a class definition syntax. Here is an example::
 
     from scrapy.loader import ItemLoader
     from scrapy.loader.processors import TakeFirst, MapCompose, Join
@@ -275,9 +272,9 @@ ItemLoader objects
 
 .. class:: ItemLoader([item, selector, response], **kwargs)
 
-    Return a new Item Loader for populating the given Item. If no item is
-    given, one is instantiated automatically using the class in
-    :attr:`default_item_class`.
+    Return a new Item Loader for populating the given :ref:`item object
+    <topics-items>`. If no item object is given, one is instantiated
+    automatically using the class in :attr:`default_item_class`.
 
     When instantiated with a ``selector`` or a ``response`` parameters
     the :class:`ItemLoader` class provides convenient mechanisms for extracting
@@ -286,7 +283,7 @@ ItemLoader objects
     :param item: The item instance to populate using subsequent calls to
         :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
         or :meth:`~ItemLoader.add_value`.
-    :type item: :class:`~scrapy.item.Item` object
+    :type item: :ref:`item object <topics-items>`
 
     :param selector: The selector to extract data from, when using the
         :meth:`add_xpath` (resp. :meth:`add_css`) or :meth:`replace_xpath`
@@ -444,17 +441,19 @@ ItemLoader objects
 
         Create a nested loader with an xpath selector.
         The supplied selector is applied relative to selector associated
-        with this :class:`ItemLoader`. The nested loader shares the :class:`Item`
-        with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
-        :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
+        with this :class:`ItemLoader`. The nested loader shares the :ref:`item
+        object <topics-items>` with the parent :class:`ItemLoader` so calls to
+        :meth:`add_xpath`, :meth:`add_value`, :meth:`replace_value`, etc. will
+        behave as expected.
 
     .. method:: nested_css(css)
 
         Create a nested loader with a css selector.
         The supplied selector is applied relative to selector associated
-        with this :class:`ItemLoader`. The nested loader shares the :class:`Item`
-        with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
-        :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
+        with this :class:`ItemLoader`. The nested loader shares the :ref:`item
+        object <topics-items>` with the parent :class:`ItemLoader` so calls to
+        :meth:`add_xpath`, :meth:`add_value`, :meth:`replace_value`, etc. will
+        behave as expected.
 
     .. method:: get_collected_values(field_name)
 
@@ -477,7 +476,7 @@ ItemLoader objects
 
     .. attribute:: item
 
-        The :class:`~scrapy.item.Item` object being parsed by this Item Loader.
+        The :ref:`item object <topics-items>` being parsed by this Item Loader.
         This is mostly used as a property so when attempting to override this
         value, you may want to check out :attr:`default_item_class` first.
 
@@ -488,8 +487,8 @@ ItemLoader objects
 
     .. attribute:: default_item_class
 
-        An Item class (or factory), used to instantiate items when not given in
-        the ``__init__`` method.
+        An :ref:`item object <topics-items>` class or factory, used to
+        instantiate items when not given in the ``__init__`` method.
 
     .. attribute:: default_input_processor
 
diff --git a/docs/topics/media-pipeline.rst b/docs/topics/media-pipeline.rst
index 86550d7a4..01de3dedb 100644
--- a/docs/topics/media-pipeline.rst
+++ b/docs/topics/media-pipeline.rst
@@ -156,7 +156,7 @@ following forms::
 
     ftp://username:password@address:port/path
     ftp://address:port/path
-    
+
 If ``username`` and ``password`` are not provided, they are taken from the :setting:`FTP_USER` and
 :setting:`FTP_PASSWORD` settings respectively.
 
@@ -243,20 +243,22 @@ Usage example
 .. setting:: IMAGES_URLS_FIELD
 .. setting:: IMAGES_RESULT_FIELD
 
-In order to use a media pipeline first, :ref:`enable it
+In order to use a media pipeline, first :ref:`enable it
 <topics-media-pipeline-enabling>`.
 
-Then, if a spider returns a dict with the URLs key (``file_urls`` or
-``image_urls``, for the Files or Images Pipeline respectively), the pipeline will
-put the results under respective key (``files`` or ``images``).
+Then, if a spider returns an :ref:`item object <topics-items>` with the URLs
+field (``file_urls`` or ``image_urls``, for the Files or Images Pipeline
+respectively), the pipeline will put the results under the respective field
+(``files`` or ``images``).
 
-If you prefer to use :class:`~.Item`, then define a custom item with the
-necessary fields, like in this example for Images Pipeline::
+When using :ref:`item types <item-types>` for which fields are defined beforehand,
+you must define both the URLs field and the results field. For example, when
+using the images pipeline, items must define both the ``image_urls`` and the
+``images`` field. For instance, using the :class:`~scrapy.item.Item` class::
 
     import scrapy
 
     class MyItem(scrapy.Item):
-
         # ... other item fields ...
         image_urls = scrapy.Field()
         images = scrapy.Field()
@@ -445,8 +447,11 @@ See here the methods that you can override in your custom Files Pipeline:
       :meth:`~get_media_requests` method and return a Request for each
       file URL::
 
+         from itemadapter import ItemAdapter
+
          def get_media_requests(self, item, info):
-             for file_url in item['file_urls']:
+             adapter = ItemAdapter(item)
+             for file_url in adapter['file_urls']:
                  yield scrapy.Request(file_url)
 
       Those requests will be processed by the pipeline and, when they have finished
@@ -509,13 +514,15 @@ See here the methods that you can override in your custom Files Pipeline:
       store the downloaded file paths (passed in results) in the ``file_paths``
       item field, and we drop the item if it doesn't contain any files::
 
+          from itemadapter import ItemAdapter
           from scrapy.exceptions import DropItem
 
           def item_completed(self, results, item, info):
               file_paths = [x['path'] for ok, x in results if ok]
               if not file_paths:
                   raise DropItem("Item contains no files")
-              item['file_paths'] = file_paths
+              adapter = ItemAdapter(item)
+              adapter['file_paths'] = file_paths
               return item
 
       By default, the :meth:`item_completed` method returns the item.
@@ -589,8 +596,9 @@ Here is a full example of the Images Pipeline whose methods are exemplified
 above::
 
     import scrapy
-    from scrapy.pipelines.images import ImagesPipeline
+    from itemadapter import ItemAdapter
     from scrapy.exceptions import DropItem
+    from scrapy.pipelines.images import ImagesPipeline
 
     class MyImagesPipeline(ImagesPipeline):
 
@@ -602,7 +610,8 @@ above::
             image_paths = [x['path'] for ok, x in results if ok]
             if not image_paths:
                 raise DropItem("Item contains no images")
-            item['image_paths'] = image_paths
+            adapter = ItemAdapter(item)
+            adapter['image_paths'] = image_paths
             return item
 
 
diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index f06d9db3c..5178f272f 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -236,8 +236,8 @@ CONCURRENT_ITEMS
 
 Default: ``100``
 
-Maximum number of concurrent items (per response) to process in parallel in the
-Item Processor (also known as the :ref:`Item Pipeline <topics-item-pipeline>`).
+Maximum number of concurrent items (per response) to process in parallel in
+:ref:`item pipelines <topics-item-pipeline>`.
 
 .. setting:: CONCURRENT_REQUESTS
 
diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index fe4fb0834..255ba9d3f 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -151,8 +151,8 @@ item_scraped
 
     This signal supports returning deferreds from its handlers.
 
-    :param item: the item scraped
-    :type item: dict or :class:`~scrapy.item.Item` object
+    :param item: the scraped item
+    :type item: :ref:`item object <item-types>`
 
     :param spider: the spider which scraped the item
     :type spider: :class:`~scrapy.spiders.Spider` object
@@ -172,7 +172,7 @@ item_dropped
     This signal supports returning deferreds from its handlers.
 
     :param item: the item dropped from the :ref:`topics-item-pipeline`
-    :type item: dict or :class:`~scrapy.item.Item` object
+    :type item: :ref:`item object <item-types>`
 
     :param spider: the spider which scraped the item
     :type spider: :class:`~scrapy.spiders.Spider` object
@@ -196,8 +196,8 @@ item_error
 
     This signal supports returning deferreds from its handlers.
 
-    :param item: the item dropped from the :ref:`topics-item-pipeline`
-    :type item: dict or :class:`~scrapy.item.Item` object
+    :param item: the item that caused the error in the :ref:`topics-item-pipeline`
+    :type item: :ref:`item object <item-types>`
 
     :param response: the response being processed when the exception was raised
     :type response: :class:`~scrapy.http.Response` object
diff --git a/docs/topics/spider-middleware.rst b/docs/topics/spider-middleware.rst
index d49a2209d..c6cbdba76 100644
--- a/docs/topics/spider-middleware.rst
+++ b/docs/topics/spider-middleware.rst
@@ -102,29 +102,28 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
         it has processed the response.
 
         :meth:`process_spider_output` must return an iterable of
-        :class:`~scrapy.http.Request`, dict or :class:`~scrapy.item.Item`
-        objects.
+        :class:`~scrapy.http.Request` objects and :ref:`item object
+        <topics-items>`.
 
         :param response: the response which generated this output from the
           spider
         :type response: :class:`~scrapy.http.Response` object
 
         :param result: the result returned by the spider
-        :type result: an iterable of :class:`~scrapy.http.Request`, dict
-          or :class:`~scrapy.item.Item` objects
+        :type result: an iterable of :class:`~scrapy.http.Request` objects and
+          :ref:`item object <topics-items>`
 
         :param spider: the spider whose result is being processed
         :type spider: :class:`~scrapy.spiders.Spider` object
 
-
     .. method:: process_spider_exception(response, exception, spider)
 
         This method is called when a spider or :meth:`process_spider_output`
         method (from a previous spider middleware) raises an exception.
 
         :meth:`process_spider_exception` should return either ``None`` or an
-        iterable of :class:`~scrapy.http.Request`, dict or
-        :class:`~scrapy.item.Item` objects.
+        iterable of :class:`~scrapy.http.Request` objects and :ref:`item object
+        <topics-items>`.
 
         If it returns ``None``, Scrapy will continue processing this exception,
         executing any other :meth:`process_spider_exception` in the following
diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst
index 8ff5e7292..d4d6e2ea0 100644
--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@@ -23,8 +23,8 @@ For spiders, the scraping cycle goes through something like this:
    :attr:`~scrapy.spiders.Spider.parse` method as callback function for the
    Requests.
 
-2. In the callback function, you parse the response (web page) and return either
-   dicts with extracted data, :class:`~scrapy.item.Item` objects,
+2. In the callback function, you parse the response (web page) and return
+   :ref:`item objects <topics-items>`,
    :class:`~scrapy.http.Request` objects, or an iterable of these objects.
    Those Requests will also contain a callback (maybe
    the same) and will then be downloaded by Scrapy and then their
@@ -179,8 +179,8 @@ scrapy.Spider
        the same requirements as the :class:`Spider` class.
 
        This method, as well as any other Request callback, must return an
-       iterable of :class:`~scrapy.http.Request` and/or
-       dicts or :class:`~scrapy.item.Item` objects.
+       iterable of :class:`~scrapy.http.Request` and/or :ref:`item objects
+       <topics-items>`.
 
        :param response: the response to parse
        :type response: :class:`~scrapy.http.Response`
@@ -234,7 +234,7 @@ Return multiple Requests and items from a single callback::
                 yield scrapy.Request(response.urljoin(href), self.parse)
 
 Instead of :attr:`~.start_urls` you can use :meth:`~.start_requests` directly;
-to give data more structure you can use :ref:`topics-items`::
+to give data more structure you can use :class:`~scrapy.item.Item` objects::
 
     import scrapy
     from myproject.items import MyItem
@@ -364,7 +364,7 @@ CrawlSpider
 
       This method is called for the start_urls responses. It allows to parse
       the initial responses and must return either an
-      :class:`~scrapy.item.Item` object, a :class:`~scrapy.http.Request`
+      :ref:`item object <topics-items>`, a :class:`~scrapy.http.Request`
       object, or an iterable containing any of them.
 
 Crawling rules
@@ -383,7 +383,7 @@ Crawling rules
    object with that name will be used) to be called for each link extracted with
    the specified link extractor. This callback receives a :class:`~scrapy.http.Response`
    as its first argument and must return either a single instance or an iterable of
-   :class:`~scrapy.item.Item`, ``dict`` and/or :class:`~scrapy.http.Request` objects
+   :ref:`item objects <topics-items>` and/or :class:`~scrapy.http.Request` objects
    (or any subclass of them). As mentioned above, the received :class:`~scrapy.http.Response`
    object will contain the text of the link that produced the :class:`~scrapy.http.Request`
    in its ``meta`` dictionary (under the ``link_text`` key)
@@ -531,7 +531,7 @@ XMLFeedSpider
         (``itertag``).  Receives the response and an
         :class:`~scrapy.selector.Selector` for each node.  Overriding this
         method is mandatory. Otherwise, you spider won't work.  This method
-        must return either a :class:`~scrapy.item.Item` object, a
+        must return an :ref:`item object <topics-items>`, a
         :class:`~scrapy.http.Request` object, or an iterable containing any of
         them.
 
@@ -541,7 +541,7 @@ XMLFeedSpider
         spider, and it's intended to perform any last time processing required
         before returning the results to the framework core, for example setting the
         item IDs. It receives a list of results and the response which originated
-        those results. It must return a list of results (Items or Requests).
+        those results. It must return a list of results (items or requests).
 
 
 XMLFeedSpider example
diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index 580fd2828..8b7fa8b58 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -1,11 +1,11 @@
 import json
 import logging
 
+from itemadapter import is_item, ItemAdapter
 from w3lib.url import is_url
 
 from scrapy.commands import ScrapyCommand
 from scrapy.http import Request
-from scrapy.item import _BaseItem
 from scrapy.utils import display
 from scrapy.utils.conf import arglist_to_dict
 from scrapy.utils.spider import iterate_spider_output, spidercls_for_request
@@ -81,7 +81,7 @@ class Command(ScrapyCommand):
             items = self.items.get(lvl, [])
 
         print("# Scraped Items ", "-" * 60)
-        display.pprint([dict(x) for x in items], colorize=colour)
+        display.pprint([ItemAdapter(x).asdict() for x in items], colorize=colour)
 
     def print_requests(self, lvl=None, colour=True):
         if lvl is None:
@@ -117,7 +117,7 @@ class Command(ScrapyCommand):
         items, requests = [], []
 
         for x in iterate_spider_output(callback(response, **cb_kwargs)):
-            if isinstance(x, (_BaseItem, dict)):
+            if is_item(x):
                 items.append(x)
             elif isinstance(x, Request):
                 requests.append(x)
diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py
index cdc2bac15..34f0d36d4 100644
--- a/scrapy/contracts/default.py
+++ b/scrapy/contracts/default.py
@@ -1,10 +1,10 @@
 import json
 
-from scrapy.item import _BaseItem
-from scrapy.http import Request
-from scrapy.exceptions import ContractFail
+from itemadapter import is_item, ItemAdapter
 
 from scrapy.contracts import Contract
+from scrapy.exceptions import ContractFail
+from scrapy.http import Request
 
 
 # contracts
@@ -48,11 +48,11 @@ class ReturnsContract(Contract):
     """
 
     name = 'returns'
-    objects = {
-        'request': Request,
-        'requests': Request,
-        'item': (_BaseItem, dict),
-        'items': (_BaseItem, dict),
+    object_type_verifiers = {
+        'request': lambda x: isinstance(x, Request),
+        'requests': lambda x: isinstance(x, Request),
+        'item': is_item,
+        'items': is_item,
     }
 
     def __init__(self, *args, **kwargs):
@@ -64,7 +64,7 @@ class ReturnsContract(Contract):
                 % len(self.args)
             )
         self.obj_name = self.args[0] or None
-        self.obj_type = self.objects[self.obj_name]
+        self.obj_type_verifier = self.object_type_verifiers[self.obj_name]
 
         try:
             self.min_bound = int(self.args[1])
@@ -79,7 +79,7 @@ class ReturnsContract(Contract):
     def post_process(self, output):
         occurrences = 0
         for x in output:
-            if isinstance(x, self.obj_type):
+            if self.obj_type_verifier(x):
                 occurrences += 1
 
         assertion = (self.min_bound <= occurrences <= self.max_bound)
@@ -103,8 +103,8 @@ class ScrapesContract(Contract):
 
     def post_process(self, output):
         for x in output:
-            if isinstance(x, (_BaseItem, dict)):
-                missing = [arg for arg in self.args if arg not in x]
+            if is_item(x):
+                missing = [arg for arg in self.args if arg not in ItemAdapter(x)]
                 if missing:
-                    raise ContractFail(
-                        "Missing fields: %s" % ", ".join(missing))
+                    missing_str = ", ".join(missing)
+                    raise ContractFail("Missing fields: %s" % missing_str)
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index 6785e103d..d07c7aa62 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -4,18 +4,18 @@ extracts information from them"""
 import logging
 from collections import deque
 
-from twisted.python.failure import Failure
+from itemadapter import is_item
 from twisted.internet import defer
+from twisted.python.failure import Failure
 
-from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errback
-from scrapy.utils.spider import iterate_spider_output
-from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
-from scrapy.utils.log import logformatter_adapter, failure_to_exc_info
-from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
 from scrapy import signals
-from scrapy.http import Request, Response
-from scrapy.item import _BaseItem
 from scrapy.core.spidermw import SpiderMiddlewareManager
+from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
+from scrapy.http import Request, Response
+from scrapy.utils.defer import defer_result, defer_succeed, iter_errback, parallel
+from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
+from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
+from scrapy.utils.spider import iterate_spider_output
 
 
 logger = logging.getLogger(__name__)
@@ -191,7 +191,7 @@ class Scraper:
         """
         if isinstance(output, Request):
             self.crawler.engine.crawl(request=output, spider=spider)
-        elif isinstance(output, (_BaseItem, dict)):
+        elif is_item(output):
             self.slot.itemproc_size += 1
             dfd = self.itemproc.process_item(output, spider)
             dfd.addBoth(self._itemproc_finished, output, response, spider)
@@ -200,10 +200,11 @@ class Scraper:
             pass
         else:
             typename = type(output).__name__
-            logger.error('Spider must return Request, BaseItem, dict or None, '
-                         'got %(typename)r in %(request)s',
-                         {'request': request, 'typename': typename},
-                         extra={'spider': spider})
+            logger.error(
+                'Spider must return request, item, or None, got %(typename)r in %(request)s',
+                {'request': request, 'typename': typename},
+                extra={'spider': spider},
+            )
 
     def _log_download_errors(self, spider_failure, download_failure, request, spider):
         """Log and silence errors that come from the engine (typically download
diff --git a/scrapy/exporters.py b/scrapy/exporters.py
index de009082a..712572673 100644
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@@ -4,16 +4,18 @@ Item Exporters are used to export/serialize items into different formats.
 
 import csv
 import io
-import pprint
 import marshal
-import warnings
 import pickle
+import pprint
+import warnings
 from xml.sax.saxutils import XMLGenerator
 
-from scrapy.utils.serialize import ScrapyJSONEncoder
-from scrapy.utils.python import to_bytes, to_unicode, is_listlike
-from scrapy.item import _BaseItem
+from itemadapter import is_item, ItemAdapter
+
 from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.item import _BaseItem
+from scrapy.utils.python import is_listlike, to_bytes, to_unicode
+from scrapy.utils.serialize import ScrapyJSONEncoder
 
 
 __all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
@@ -56,11 +58,14 @@ class BaseItemExporter:
         """Return the fields to export as an iterable of tuples
         (name, serialized_value)
         """
+        item = ItemAdapter(item)
+
         if include_empty is None:
             include_empty = self.export_empty_fields
+
         if self.fields_to_export is None:
-            if include_empty and not isinstance(item, dict):
-                field_iter = item.fields.keys()
+            if include_empty:
+                field_iter = item.field_names()
             else:
                 field_iter = item.keys()
         else:
@@ -71,8 +76,8 @@ class BaseItemExporter:
 
         for field_name in field_iter:
             if field_name in item:
-                field = {} if isinstance(item, dict) else item.fields[field_name]
-                value = self.serialize_field(field, field_name, item[field_name])
+                field_meta = item.get_field_meta(field_name)
+                value = self.serialize_field(field_meta, field_name, item[field_name])
             else:
                 value = default_value
 
@@ -297,6 +302,7 @@ class PythonItemExporter(BaseItemExporter):
 
     .. _msgpack: https://pypi.org/project/msgpack/
     """
+
     def _configure(self, options, dont_fail=False):
         self.binary = options.pop('binary', True)
         super(PythonItemExporter, self)._configure(options, dont_fail)
@@ -314,22 +320,22 @@ class PythonItemExporter(BaseItemExporter):
     def _serialize_value(self, value):
         if isinstance(value, _BaseItem):
             return self.export_item(value)
-        if isinstance(value, dict):
-            return dict(self._serialize_dict(value))
-        if is_listlike(value):
+        elif is_item(value):
+            return dict(self._serialize_item(value))
+        elif is_listlike(value):
             return [self._serialize_value(v) for v in value]
         encode_func = to_bytes if self.binary else to_unicode
         if isinstance(value, (str, bytes)):
             return encode_func(value, encoding=self.encoding)
         return value
 
-    def _serialize_dict(self, value):
-        for key, val in value.items():
+    def _serialize_item(self, item):
+        for key, value in ItemAdapter(item).items():
             key = to_bytes(key) if self.binary else key
-            yield key, self._serialize_value(val)
+            yield key, self._serialize_value(value)
 
     def export_item(self, item):
         result = dict(self._get_serialized_fields(item))
         if self.binary:
-            result = dict(self._serialize_dict(result))
+            result = dict(self._serialize_item(result))
         return result
diff --git a/scrapy/item.py b/scrapy/item.py
index 97dfed976..4ab83d1a0 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -36,7 +36,7 @@ class BaseItem(_BaseItem, metaclass=_BaseItemMeta):
     """
 
     def __new__(cls, *args, **kwargs):
-        if issubclass(cls, BaseItem) and not (issubclass(cls, Item) or issubclass(cls, DictItem)):
+        if issubclass(cls, BaseItem) and not issubclass(cls, (Item, DictItem)):
             warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
                  ScrapyDeprecationWarning, stacklevel=2)
         return super(BaseItem, cls).__new__(cls, *args, **kwargs)
diff --git a/scrapy/loader/__init__.py b/scrapy/loader/__init__.py
index 21c4fb376..18f57945f 100644
--- a/scrapy/loader/__init__.py
+++ b/scrapy/loader/__init__.py
@@ -6,6 +6,8 @@ See documentation in docs/topics/loaders.rst
 from collections import defaultdict
 from contextlib import suppress
 
+from itemadapter import ItemAdapter
+
 from scrapy.item import Item
 from scrapy.loader.common import wrap_loader_context
 from scrapy.loader.processors import Identity
@@ -44,7 +46,7 @@ class ItemLoader:
         self._local_item = context['item'] = item
         self._local_values = defaultdict(list)
         # values from initial item
-        for field_name, value in item.items():
+        for field_name, value in ItemAdapter(item).items():
             self._values[field_name] += arg_to_iter(value)
 
     @property
@@ -127,13 +129,12 @@ class ItemLoader:
         return value
 
     def load_item(self):
-        item = self.item
+        adapter = ItemAdapter(self.item)
         for field_name in tuple(self._values):
             value = self.get_output_value(field_name)
             if value is not None:
-                item[field_name] = value
-
-        return item
+                adapter[field_name] = value
+        return adapter.item
 
     def get_output_value(self, field_name):
         proc = self.get_output_processor(field_name)
@@ -174,11 +175,8 @@ class ItemLoader:
                                     value, type(e).__name__, str(e)))
 
     def _get_item_field_attr(self, field_name, key, default=None):
-        if isinstance(self.item, Item):
-            value = self.item.fields[field_name].get(key, default)
-        else:
-            value = default
-        return value
+        field_meta = ItemAdapter(self.item).get_field_meta(field_name)
+        return field_meta.get(key, default)
 
     def _check_selector_method(self):
         if self.selector is None:
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index 7d86d0d56..487382a38 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -10,24 +10,26 @@ import mimetypes
 import os
 import time
 from collections import defaultdict
-from email.utils import parsedate_tz, mktime_tz
+from contextlib import suppress
+from email.utils import mktime_tz, parsedate_tz
 from ftplib import FTP
 from io import BytesIO
 from urllib.parse import urlparse
 
+from itemadapter import ItemAdapter
 from twisted.internet import defer, threads
 
+from scrapy.exceptions import IgnoreRequest, NotConfigured
+from scrapy.http import Request
 from scrapy.pipelines.media import MediaPipeline
 from scrapy.settings import Settings
-from scrapy.exceptions import NotConfigured, IgnoreRequest
-from scrapy.http import Request
-from scrapy.utils.misc import md5sum
-from scrapy.utils.log import failure_to_exc_info
-from scrapy.utils.python import to_bytes
-from scrapy.utils.request import referer_str
 from scrapy.utils.boto import is_botocore
 from scrapy.utils.datatypes import CaselessDict
 from scrapy.utils.ftp import ftp_store_file
+from scrapy.utils.log import failure_to_exc_info
+from scrapy.utils.misc import md5sum
+from scrapy.utils.python import to_bytes
+from scrapy.utils.request import referer_str
 
 
 logger = logging.getLogger(__name__)
@@ -517,7 +519,8 @@ class FilesPipeline(MediaPipeline):
 
     # Overridable Interface
     def get_media_requests(self, item, info):
-        return [Request(x) for x in item.get(self.files_urls_field, [])]
+        urls = ItemAdapter(item).get(self.files_urls_field, [])
+        return [Request(u) for u in urls]
 
     def file_downloaded(self, response, request, info):
         path = self.file_path(request, response=response, info=info)
@@ -528,8 +531,8 @@ class FilesPipeline(MediaPipeline):
         return checksum
 
     def item_completed(self, results, item, info):
-        if isinstance(item, dict) or self.files_result_field in item.fields:
-            item[self.files_result_field] = [x for ok, x in results if ok]
+        with suppress(KeyError):
+            ItemAdapter(item)[self.files_result_field] = [x for ok, x in results if ok]
         return item
 
     def file_path(self, request, response=None, info=None):
diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py
index aeb520442..46f2bfb58 100644
--- a/scrapy/pipelines/images.py
+++ b/scrapy/pipelines/images.py
@@ -5,17 +5,19 @@ See documentation in topics/media-pipeline.rst
 """
 import functools
 import hashlib
+from contextlib import suppress
 from io import BytesIO
 
+from itemadapter import ItemAdapter
 from PIL import Image
 
+from scrapy.exceptions import DropItem
+from scrapy.http import Request
+from scrapy.pipelines.files import FileException, FilesPipeline
+# TODO: from scrapy.pipelines.media import MediaPipeline
+from scrapy.settings import Settings
 from scrapy.utils.misc import md5sum
 from scrapy.utils.python import to_bytes
-from scrapy.http import Request
-from scrapy.settings import Settings
-from scrapy.exceptions import DropItem
-# TODO: from scrapy.pipelines.media import MediaPipeline
-from scrapy.pipelines.files import FileException, FilesPipeline
 
 
 class NoimagesDrop(DropItem):
@@ -157,11 +159,12 @@ class ImagesPipeline(FilesPipeline):
         return image, buf
 
     def get_media_requests(self, item, info):
-        return [Request(x) for x in item.get(self.images_urls_field, [])]
+        urls = ItemAdapter(item).get(self.images_urls_field, [])
+        return [Request(u) for u in urls]
 
     def item_completed(self, results, item, info):
-        if isinstance(item, dict) or self.images_result_field in item.fields:
-            item[self.images_result_field] = [x for ok, x in results if ok]
+        with suppress(KeyError):
+            ItemAdapter(item)[self.images_result_field] = [x for ok, x in results if ok]
         return item
 
     def file_path(self, request, response=None, info=None):
diff --git a/scrapy/shell.py b/scrapy/shell.py
index 3ff5a8ad8..10de119ce 100644
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@@ -6,6 +6,7 @@ See documentation in docs/topics/shell.rst
 import os
 import signal
 
+from itemadapter import is_item
 from twisted.internet import threads, defer
 from twisted.python import threadable
 from w3lib.url import any_to_uri
@@ -13,20 +14,18 @@ from w3lib.url import any_to_uri
 from scrapy.crawler import Crawler
 from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response
-from scrapy.item import _BaseItem
 from scrapy.settings import Settings
 from scrapy.spiders import Spider
-from scrapy.utils.console import start_python_console
+from scrapy.utils.conf import get_config
+from scrapy.utils.console import DEFAULT_PYTHON_SHELLS, start_python_console
 from scrapy.utils.datatypes import SequenceExclude
 from scrapy.utils.misc import load_object
 from scrapy.utils.response import open_in_browser
-from scrapy.utils.conf import get_config
-from scrapy.utils.console import DEFAULT_PYTHON_SHELLS
 
 
 class Shell:
 
-    relevant_classes = (Crawler, Spider, Request, Response, _BaseItem, Settings)
+    relevant_classes = (Crawler, Spider, Request, Response, Settings)
 
     def __init__(self, crawler, update_vars=None, code=None):
         self.crawler = crawler
@@ -154,7 +153,7 @@ class Shell:
         return "\n".join("[s] %s" % line for line in b)
 
     def _is_relevant(self, value):
-        return isinstance(value, self.relevant_classes)
+        return isinstance(value, self.relevant_classes) or is_item(value)
 
 
 def inspect_response(response, spider):
diff --git a/scrapy/spiders/feed.py b/scrapy/spiders/feed.py
index a4ff8010d..5aad7398a 100644
--- a/scrapy/spiders/feed.py
+++ b/scrapy/spiders/feed.py
@@ -31,7 +31,7 @@ class XMLFeedSpider(Spider):
         processing required before returning the results to the framework core,
         for example setting the item GUIDs. It receives a list of results and
         the response which originated that results. It must return a list of
-        results (Items or Requests).
+        results (items or requests).
         """
         return results
 
diff --git a/scrapy/templates/project/module/middlewares.py.tmpl b/scrapy/templates/project/module/middlewares.py.tmpl
index 6490f52a7..bd09890fe 100644
--- a/scrapy/templates/project/module/middlewares.py.tmpl
+++ b/scrapy/templates/project/module/middlewares.py.tmpl
@@ -5,6 +5,9 @@
 
 from scrapy import signals
 
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
 
 class ${ProjectName}SpiderMiddleware:
     # Not all methods need to be defined. If a method is not defined,
@@ -29,7 +32,7 @@ class ${ProjectName}SpiderMiddleware:
         # Called with the results returned from the Spider, after
         # it has processed the response.
 
-        # Must return an iterable of Request, dict or Item objects.
+        # Must return an iterable of Request, or item objects.
         for i in result:
             yield i
 
@@ -37,8 +40,7 @@ class ${ProjectName}SpiderMiddleware:
         # Called when a spider or process_spider_input() method
         # (from other spider middleware) raises an exception.
 
-        # Should return either None or an iterable of Request, dict
-        # or Item objects.
+        # Should return either None or an iterable of Request or item objects.
         pass
 
     def process_start_requests(self, start_requests, spider):
diff --git a/scrapy/templates/project/module/pipelines.py.tmpl b/scrapy/templates/project/module/pipelines.py.tmpl
index ce0edd335..e845f43e9 100644
--- a/scrapy/templates/project/module/pipelines.py.tmpl
+++ b/scrapy/templates/project/module/pipelines.py.tmpl
@@ -4,6 +4,10 @@
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 
 
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+
+
 class ${ProjectName}Pipeline:
     def process_item(self, item, spider):
         return item
diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py
index bf73dfa18..dc9604578 100644
--- a/scrapy/utils/serialize.py
+++ b/scrapy/utils/serialize.py
@@ -2,10 +2,10 @@ import json
 import datetime
 import decimal
 
+from itemadapter import is_item, ItemAdapter
 from twisted.internet import defer
 
 from scrapy.http import Request, Response
-from scrapy.item import _BaseItem
 
 
 class ScrapyJSONEncoder(json.JSONEncoder):
@@ -26,8 +26,8 @@ class ScrapyJSONEncoder(json.JSONEncoder):
             return str(o)
         elif isinstance(o, defer.Deferred):
             return str(o)
-        elif isinstance(o, _BaseItem):
-            return dict(o)
+        elif is_item(o):
+            return ItemAdapter(o).asdict()
         elif isinstance(o, Request):
             return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
         elif isinstance(o, Response):
diff --git a/setup.py b/setup.py
index 71dc3232d..5a99fd1bf 100644
--- a/setup.py
+++ b/setup.py
@@ -80,6 +80,7 @@ setup(
         'w3lib>=1.17.0',
         'zope.interface>=4.1.3',
         'protego>=0.1.15',
+        'itemadapter>=0.1.0',
     ],
     extras_require=extras_require,
 )
diff --git a/tests/requirements-py3.txt b/tests/requirements-py3.txt
index 91fa1c5b5..dacb86e56 100644
--- a/tests/requirements-py3.txt
+++ b/tests/requirements-py3.txt
@@ -1,4 +1,6 @@
 # Tests requirements
+attrs
+dataclasses; python_version == '3.6'
 jmespath
 mitmproxy; python_version >= '3.6'
 mitmproxy<4.0.0; python_version < '3.6'
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 6696ee52e..1b848ac72 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -16,9 +16,11 @@ import sys
 from collections import defaultdict
 from urllib.parse import urlparse
 
+import attr
+from itemadapter import ItemAdapter
 from pydispatch import dispatcher
 from testfixtures import LogCapture
-from twisted.internet import reactor, defer
+from twisted.internet import defer, reactor
 from twisted.trial import unittest
 from twisted.web import server, static, util
 
@@ -32,7 +34,7 @@ from scrapy.spiders import Spider
 from scrapy.utils.signal import disconnect_all
 from scrapy.utils.test import get_crawler
 
-from tests import tests_datadir, get_testdata
+from tests import get_testdata, tests_datadir
 
 
 class TestItem(Item):
@@ -41,6 +43,13 @@ class TestItem(Item):
     price = Field()
 
 
+@attr.s
+class AttrsItem:
+    name = attr.ib(default="")
+    url = attr.ib(default="")
+    price = attr.ib(default=0)
+
+
 class TestSpider(Spider):
     name = "scrapytest.org"
     allowed_domains = ["scrapytest.org", "localhost"]
@@ -79,6 +88,27 @@ class DictItemsSpider(TestSpider):
     item_cls = dict
 
 
+class AttrsItemsSpider(TestSpider):
+    item_class = AttrsItem
+
+
+try:
+    from dataclasses import make_dataclass
+except ImportError:
+    DataClassItemsSpider = None
+else:
+    TestDataClass = make_dataclass("TestDataClass", [("name", str), ("url", str), ("price", int)])
+
+    class DataClassItemsSpider(DictItemsSpider):
+        def parse_item(self, response):
+            item = super().parse_item(response)
+            return TestDataClass(
+                name=item.get('name'),
+                url=item.get('url'),
+                price=item.get('price'),
+            )
+
+
 class ItemZeroDivisionErrorSpider(TestSpider):
     custom_settings = {
         "ITEM_PIPELINES": {
@@ -204,7 +234,10 @@ class EngineTest(unittest.TestCase):
 
     @defer.inlineCallbacks
     def test_crawler(self):
-        for spider in TestSpider, DictItemsSpider:
+
+        for spider in (TestSpider, DictItemsSpider, AttrsItemsSpider, DataClassItemsSpider):
+            if spider is None:
+                continue
             self.run = CrawlerRun(spider)
             yield self.run.run()
             self._assert_visited_urls()
@@ -281,6 +314,7 @@ class EngineTest(unittest.TestCase):
     def _assert_scraped_items(self):
         self.assertEqual(2, len(self.run.itemresp))
         for item, response in self.run.itemresp:
+            item = ItemAdapter(item)
             self.assertEqual(item['url'], response.url)
             if 'item1.html' in item['url']:
                 self.assertEqual('Item 1 name', item['name'])
diff --git a/tests/test_loader.py b/tests/test_loader.py
index f14714c75..8a9c6fca9 100644
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@@ -1,6 +1,9 @@
 from functools import partial
 import unittest
 
+import attr
+from itemadapter import ItemAdapter
+
 from scrapy.http import HtmlResponse
 from scrapy.item import Item, Field
 from scrapy.loader import ItemLoader
@@ -9,6 +12,13 @@ from scrapy.loader.processors import (Compose, Identity, Join,
 from scrapy.selector import Selector
 
 
+try:
+    from dataclasses import make_dataclass, field as dataclass_field
+except ImportError:
+    make_dataclass = None
+    dataclass_field = None
+
+
 # test items
 class NameItem(Item):
     name = Field()
@@ -28,6 +38,11 @@ class TestNestedItem(Item):
     image = Field()
 
 
+@attr.s
+class AttrsNameItem:
+    name = attr.ib(default="")
+
+
 # test item loaders
 class NameItemLoader(ItemLoader):
     default_item_class = TestItem
@@ -466,7 +481,7 @@ class InitializationTestMixin:
         il = ItemLoader(item=input_item)
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(dict(loaded_item), {'name': ['foo']})
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo']})
 
     def test_keep_list(self):
         """Loaded item should contain values from the initial item"""
@@ -474,7 +489,7 @@ class InitializationTestMixin:
         il = ItemLoader(item=input_item)
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']})
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar']})
 
     def test_add_value_singlevalue_singlevalue(self):
         """Values added after initialization should be appended"""
@@ -483,7 +498,7 @@ class InitializationTestMixin:
         il.add_value('name', 'bar')
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']})
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar']})
 
     def test_add_value_singlevalue_list(self):
         """Values added after initialization should be appended"""
@@ -492,7 +507,7 @@ class InitializationTestMixin:
         il.add_value('name', ['item', 'loader'])
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(dict(loaded_item), {'name': ['foo', 'item', 'loader']})
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'item', 'loader']})
 
     def test_add_value_list_singlevalue(self):
         """Values added after initialization should be appended"""
@@ -501,7 +516,7 @@ class InitializationTestMixin:
         il.add_value('name', 'qwerty')
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'qwerty']})
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar', 'qwerty']})
 
     def test_add_value_list_list(self):
         """Values added after initialization should be appended"""
@@ -510,7 +525,7 @@ class InitializationTestMixin:
         il.add_value('name', ['item', 'loader'])
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'item', 'loader']})
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar', 'item', 'loader']})
 
     def test_get_output_value_singlevalue(self):
         """Getting output value must not remove value from item"""
@@ -519,7 +534,7 @@ class InitializationTestMixin:
         self.assertEqual(il.get_output_value('name'), ['foo'])
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(loaded_item, dict({'name': ['foo']}))
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), dict({'name': ['foo']}))
 
     def test_get_output_value_list(self):
         """Getting output value must not remove value from item"""
@@ -528,7 +543,7 @@ class InitializationTestMixin:
         self.assertEqual(il.get_output_value('name'), ['foo', 'bar'])
         loaded_item = il.load_item()
         self.assertIsInstance(loaded_item, self.item_class)
-        self.assertEqual(loaded_item, dict({'name': ['foo', 'bar']}))
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), dict({'name': ['foo', 'bar']}))
 
     def test_values_single(self):
         """Values from initial item must be added to loader._values"""
@@ -551,6 +566,22 @@ class InitializationFromItemTest(InitializationTestMixin, unittest.TestCase):
     item_class = NameItem
 
 
+class InitializationFromAttrsItemTest(InitializationTestMixin, unittest.TestCase):
+    item_class = AttrsNameItem
+
+
+@unittest.skipIf(not make_dataclass, "dataclasses module is not available")
+class InitializationFromDataClassTest(InitializationTestMixin, unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if make_dataclass:
+            self.item_class = make_dataclass(
+                "TestDataClass",
+                [("name", list, dataclass_field(default_factory=list))],
+            )
+
+
 class BaseNoInputReprocessingLoader(ItemLoader):
     title_in = MapCompose(str.upper)
     title_out = TakeFirst()
diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py
index 6bbcbc2e9..a023dfcc8 100644
--- a/tests/test_pipeline_files.py
+++ b/tests/test_pipeline_files.py
@@ -2,22 +2,41 @@ import os
 import random
 import time
 from io import BytesIO
-from tempfile import mkdtemp
 from shutil import rmtree
-from unittest import mock
+from tempfile import mkdtemp
+from unittest import mock, skipIf
 from urllib.parse import urlparse
 
-from twisted.trial import unittest
+import attr
+from itemadapter import ItemAdapter
 from twisted.internet import defer
+from twisted.trial import unittest
 
-from scrapy.pipelines.files import FilesPipeline, FSFilesStore, S3FilesStore, GCSFilesStore, FTPFilesStore
-from scrapy.item import Item, Field
 from scrapy.http import Request, Response
+from scrapy.item import Field, Item
+from scrapy.pipelines.files import (
+    FilesPipeline,
+    FSFilesStore,
+    FTPFilesStore,
+    GCSFilesStore,
+    S3FilesStore,
+)
 from scrapy.settings import Settings
-from scrapy.utils.test import assert_aws_environ, get_s3_content_and_delete
-from scrapy.utils.test import assert_gcs_environ, get_gcs_content_and_delete
-from scrapy.utils.test import get_ftp_content_and_delete
 from scrapy.utils.boto import is_botocore
+from scrapy.utils.test import (
+    assert_aws_environ,
+    assert_gcs_environ,
+    get_ftp_content_and_delete,
+    get_gcs_content_and_delete,
+    get_s3_content_and_delete,
+)
+
+
+try:
+    from dataclasses import make_dataclass, field as dataclass_field
+except ImportError:
+    make_dataclass = None
+    dataclass_field = None
 
 
 def _mocked_download_func(request, info):
@@ -143,43 +162,88 @@ class FilesPipelineTestCase(unittest.TestCase):
             p.stop()
 
 
-class FilesPipelineTestCaseFields(unittest.TestCase):
+class FilesPipelineTestCaseFieldsMixin:
 
     def test_item_fields_default(self):
-        class TestItem(Item):
-            name = Field()
-            file_urls = Field()
-            files = Field()
-
-        for cls in TestItem, dict:
-            url = 'http://www.example.com/files/1.txt'
-            item = cls({'name': 'item1', 'file_urls': [url]})
-            pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': 's3://example/files/'}))
-            requests = list(pipeline.get_media_requests(item, None))
-            self.assertEqual(requests[0].url, url)
-            results = [(True, {'url': url})]
-            pipeline.item_completed(results, item, None)
-            self.assertEqual(item['files'], [results[0][1]])
+        url = 'http://www.example.com/files/1.txt'
+        item = self.item_class(name='item1', file_urls=[url])
+        pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': 's3://example/files/'}))
+        requests = list(pipeline.get_media_requests(item, None))
+        self.assertEqual(requests[0].url, url)
+        results = [(True, {'url': url})]
+        item = pipeline.item_completed(results, item, None)
+        files = ItemAdapter(item).get("files")
+        self.assertEqual(files, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
 
     def test_item_fields_override_settings(self):
-        class TestItem(Item):
-            name = Field()
-            files = Field()
-            stored_file = Field()
+        url = 'http://www.example.com/files/1.txt'
+        item = self.item_class(name='item1', custom_file_urls=[url])
+        pipeline = FilesPipeline.from_settings(Settings({
+            'FILES_STORE': 's3://example/files/',
+            'FILES_URLS_FIELD': 'custom_file_urls',
+            'FILES_RESULT_FIELD': 'custom_files'
+        }))
+        requests = list(pipeline.get_media_requests(item, None))
+        self.assertEqual(requests[0].url, url)
+        results = [(True, {'url': url})]
+        item = pipeline.item_completed(results, item, None)
+        custom_files = ItemAdapter(item).get("custom_files")
+        self.assertEqual(custom_files, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
 
-        for cls in TestItem, dict:
-            url = 'http://www.example.com/files/1.txt'
-            item = cls({'name': 'item1', 'files': [url]})
-            pipeline = FilesPipeline.from_settings(Settings({
-                'FILES_STORE': 's3://example/files/',
-                'FILES_URLS_FIELD': 'files',
-                'FILES_RESULT_FIELD': 'stored_file'
-            }))
-            requests = list(pipeline.get_media_requests(item, None))
-            self.assertEqual(requests[0].url, url)
-            results = [(True, {'url': url})]
-            pipeline.item_completed(results, item, None)
-            self.assertEqual(item['stored_file'], [results[0][1]])
+
+class FilesPipelineTestCaseFieldsDict(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = dict
+
+
+class FilesPipelineTestItem(Item):
+    name = Field()
+    # default fields
+    file_urls = Field()
+    files = Field()
+    # overridden fields
+    custom_file_urls = Field()
+    custom_files = Field()
+
+
+class FilesPipelineTestCaseFieldsItem(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = FilesPipelineTestItem
+
+
+@skipIf(not make_dataclass, "dataclasses module is not available")
+class FilesPipelineTestCaseFieldsDataClass(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if make_dataclass:
+            self.item_class = make_dataclass(
+                "FilesPipelineTestDataClass",
+                [
+                    ("name", str),
+                    # default fields
+                    ("file_urls", list, dataclass_field(default_factory=list)),
+                    ("files", list, dataclass_field(default_factory=list)),
+                    # overridden fields
+                    ("custom_file_urls", list, dataclass_field(default_factory=list)),
+                    ("custom_files", list, dataclass_field(default_factory=list)),
+                ],
+            )
+
+
+@attr.s
+class FilesPipelineTestAttrsItem:
+    name = attr.ib(default="")
+    # default fields
+    file_urls = attr.ib(default=lambda: [])
+    files = attr.ib(default=lambda: [])
+    # overridden fields
+    custom_file_urls = attr.ib(default=lambda: [])
+    custom_files = attr.ib(default=lambda: [])
+
+
+class FilesPipelineTestCaseFieldsAttrsItem(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = FilesPipelineTestAttrsItem
 
 
 class FilesPipelineTestCaseCustomSettings(unittest.TestCase):
diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py
index 8ef27fce7..082e9ee21 100644
--- a/tests/test_pipeline_images.py
+++ b/tests/test_pipeline_images.py
@@ -1,17 +1,28 @@
-import io
 import hashlib
+import io
 import random
-from tempfile import mkdtemp
 from shutil import rmtree
+from tempfile import mkdtemp
+from unittest import skipIf
 
+import attr
+from itemadapter import ItemAdapter
 from twisted.trial import unittest
 
-from scrapy.item import Item, Field
 from scrapy.http import Request, Response
-from scrapy.settings import Settings
+from scrapy.item import Field, Item
 from scrapy.pipelines.images import ImagesPipeline
+from scrapy.settings import Settings
 from scrapy.utils.python import to_bytes
 
+
+try:
+    from dataclasses import make_dataclass, field as dataclass_field
+except ImportError:
+    make_dataclass = None
+    dataclass_field = None
+
+
 skip = False
 try:
     from PIL import Image
@@ -124,43 +135,89 @@ class DeprecatedImagesPipeline(ImagesPipeline):
         return 'thumbsup/%s/%s.jpg' % (thumb_id, thumb_guid)
 
 
-class ImagesPipelineTestCaseFields(unittest.TestCase):
+class ImagesPipelineTestCaseFieldsMixin:
 
     def test_item_fields_default(self):
-        class TestItem(Item):
-            name = Field()
-            image_urls = Field()
-            images = Field()
-
-        for cls in TestItem, dict:
-            url = 'http://www.example.com/images/1.jpg'
-            item = cls({'name': 'item1', 'image_urls': [url]})
-            pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'}))
-            requests = list(pipeline.get_media_requests(item, None))
-            self.assertEqual(requests[0].url, url)
-            results = [(True, {'url': url})]
-            pipeline.item_completed(results, item, None)
-            self.assertEqual(item['images'], [results[0][1]])
+        url = 'http://www.example.com/images/1.jpg'
+        item = self.item_class(name='item1', image_urls=[url])
+        pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'}))
+        requests = list(pipeline.get_media_requests(item, None))
+        self.assertEqual(requests[0].url, url)
+        results = [(True, {'url': url})]
+        item = pipeline.item_completed(results, item, None)
+        images = ItemAdapter(item).get("images")
+        self.assertEqual(images, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
 
     def test_item_fields_override_settings(self):
-        class TestItem(Item):
-            name = Field()
-            image = Field()
-            stored_image = Field()
+        url = 'http://www.example.com/images/1.jpg'
+        item = self.item_class(name='item1', custom_image_urls=[url])
+        pipeline = ImagesPipeline.from_settings(Settings({
+            'IMAGES_STORE': 's3://example/images/',
+            'IMAGES_URLS_FIELD': 'custom_image_urls',
+            'IMAGES_RESULT_FIELD': 'custom_images'
+        }))
+        requests = list(pipeline.get_media_requests(item, None))
+        self.assertEqual(requests[0].url, url)
+        results = [(True, {'url': url})]
+        item = pipeline.item_completed(results, item, None)
+        custom_images = ItemAdapter(item).get("custom_images")
+        self.assertEqual(custom_images, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
 
-        for cls in TestItem, dict:
-            url = 'http://www.example.com/images/1.jpg'
-            item = cls({'name': 'item1', 'image': [url]})
-            pipeline = ImagesPipeline.from_settings(Settings({
-                'IMAGES_STORE': 's3://example/images/',
-                'IMAGES_URLS_FIELD': 'image',
-                'IMAGES_RESULT_FIELD': 'stored_image'
-            }))
-            requests = list(pipeline.get_media_requests(item, None))
-            self.assertEqual(requests[0].url, url)
-            results = [(True, {'url': url})]
-            pipeline.item_completed(results, item, None)
-            self.assertEqual(item['stored_image'], [results[0][1]])
+
+class ImagesPipelineTestCaseFieldsDict(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = dict
+
+
+class ImagesPipelineTestItem(Item):
+    name = Field()
+    # default fields
+    image_urls = Field()
+    images = Field()
+    # overridden fields
+    custom_image_urls = Field()
+    custom_images = Field()
+
+
+class ImagesPipelineTestCaseFieldsItem(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = ImagesPipelineTestItem
+
+
+@skipIf(not make_dataclass, "dataclasses module is not available")
+class ImagesPipelineTestCaseFieldsDataClass(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = None
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if make_dataclass:
+            self.item_class = make_dataclass(
+                "FilesPipelineTestDataClass",
+                [
+                    ("name", str),
+                    # default fields
+                    ("image_urls", list, dataclass_field(default_factory=list)),
+                    ("images", list, dataclass_field(default_factory=list)),
+                    # overridden fields
+                    ("custom_image_urls", list, dataclass_field(default_factory=list)),
+                    ("custom_images", list, dataclass_field(default_factory=list)),
+                ],
+            )
+
+
+@attr.s
+class ImagesPipelineTestAttrsItem:
+    name = attr.ib(default="")
+    # default fields
+    image_urls = attr.ib(default=lambda: [])
+    images = attr.ib(default=lambda: [])
+    # overridden fields
+    custom_image_urls = attr.ib(default=lambda: [])
+    custom_images = attr.ib(default=lambda: [])
+
+
+class ImagesPipelineTestCaseFieldsAttrsItem(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = ImagesPipelineTestAttrsItem
 
 
 class ImagesPipelineTestCaseCustomSettings(unittest.TestCase):
diff --git a/tests/test_utils_serialize.py b/tests/test_utils_serialize.py
index 6dc117779..daf022aee 100644
--- a/tests/test_utils_serialize.py
+++ b/tests/test_utils_serialize.py
@@ -1,18 +1,25 @@
+import datetime
 import json
 import unittest
-import datetime
 from decimal import Decimal
 
+import attr
 from twisted.internet import defer
 
-from scrapy.utils.serialize import ScrapyJSONEncoder
 from scrapy.http import Request, Response
+from scrapy.utils.serialize import ScrapyJSONEncoder
+
+
+try:
+    from dataclasses import make_dataclass
+except ImportError:
+    make_dataclass = None
 
 
 class JsonEncoderTestCase(unittest.TestCase):
 
     def setUp(self):
-        self.encoder = ScrapyJSONEncoder()
+        self.encoder = ScrapyJSONEncoder(sort_keys=True)
 
     def test_encode_decode(self):
         dt = datetime.datetime(2010, 1, 2, 10, 11, 12)
@@ -31,7 +38,8 @@ class JsonEncoderTestCase(unittest.TestCase):
         for input, output in [('foo', 'foo'), (d, ds), (t, ts), (dt, dts),
                               (dec, decs), (['foo', d], ['foo', ds]), (s, ss),
                               (dt_set, dt_sets)]:
-            self.assertEqual(self.encoder.encode(input), json.dumps(output))
+            self.assertEqual(self.encoder.encode(input),
+                             json.dumps(output, sort_keys=True))
 
     def test_encode_deferred(self):
         self.assertIn('Deferred', self.encoder.encode(defer.Deferred()))
@@ -47,3 +55,30 @@ class JsonEncoderTestCase(unittest.TestCase):
         rs = self.encoder.encode(r)
         self.assertIn(r.url, rs)
         self.assertIn(str(r.status), rs)
+
+    @unittest.skipIf(not make_dataclass, "No dataclass support")
+    def test_encode_dataclass_item(self):
+        TestDataClass = make_dataclass(
+            "TestDataClass",
+            [("name", str), ("url", str), ("price", int)],
+        )
+        item = TestDataClass(name="Product", url="http://product.org", price=1)
+        encoded = self.encoder.encode(item)
+        self.assertEqual(
+            encoded,
+            '{"name": "Product", "price": 1, "url": "http://product.org"}'
+        )
+
+    def test_encode_attrs_item(self):
+        @attr.s
+        class AttrsItem:
+            name = attr.ib(type=str)
+            url = attr.ib(type=str)
+            price = attr.ib(type=int)
+
+        item = AttrsItem(name="Product", url="http://product.org", price=1)
+        encoded = self.encoder.encode(item)
+        self.assertEqual(
+            encoded,
+            '{"name": "Product", "price": 1, "url": "http://product.org"}'
+        )
diff --git a/tox.ini b/tox.ini
index 69b1bdfdd..4c790158d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -37,7 +37,7 @@ deps =
     pytest-flake8
 commands =
     py.test --flake8 {posargs:docs scrapy tests}
-    
+
 [testenv:pylint]
 basepython = python3
 deps =
@@ -62,6 +62,7 @@ deps =
     -ctests/constraints.txt
     cryptography==2.0
     cssselect==0.9.1
+    itemadapter==0.1.0
     lxml==3.5.0
     parsel==1.5.0
     Protego==0.1.15

From 214da8e5235c9676bc4b0427c8bf58f328fe6570 Mon Sep 17 00:00:00 2001
From: Ram Rachum <ram@rachum.com>
Date: Wed, 17 Jun 2020 13:50:54 +0300
Subject: [PATCH 167/181] Use chain.from_iterable in python.py

---
 scrapy/utils/python.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py
index 9c1f3c2fe..afa8a8135 100644
--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@@ -334,10 +334,10 @@ class MutableChain:
     """
 
     def __init__(self, *args):
-        self.data = chain(*args)
+        self.data = chain.from_iterable(args)
 
     def extend(self, *iterables):
-        self.data = chain(self.data, *iterables)
+        self.data = chain(self.data, chain.from_iterable(iterables))
 
     def __iter__(self):
         return self

From 3d027fb578532d504b3dbfaa77a06c3560f85d3c Mon Sep 17 00:00:00 2001
From: Stas Glubokiy <glubokiy.stas@gmail.com>
Date: Wed, 17 Jun 2020 18:08:14 +0300
Subject: [PATCH 168/181] Fix missing storage.store calls in
 FeedExporter.close_spider (#4626)

---
 scrapy/extensions/feedexport.py |  4 ++-
 tests/test_feedexport.py        | 49 ++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py
index 998d2a5d1..30e6349d6 100644
--- a/scrapy/extensions/feedexport.py
+++ b/scrapy/extensions/feedexport.py
@@ -270,7 +270,9 @@ class FeedExporter:
             if not slot.itemcount and not slot.store_empty:
                 # We need to call slot.storage.store nonetheless to get the file
                 # properly closed.
-                return defer.maybeDeferred(slot.storage.store, slot.file)
+                d = defer.maybeDeferred(slot.storage.store, slot.file)
+                deferred_list.append(d)
+                continue
             slot.finish_exporting()
             logfmt = "%s %%(format)s feed (%%(itemcount)d items) in: %%(uri)s"
             log_args = {'format': slot.format,
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
index 8eeb29b6d..f7013bc44 100644
--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@@ -7,6 +7,7 @@ import string
 import tempfile
 import warnings
 from io import BytesIO
+from logging import getLogger
 from pathlib import Path
 from string import ascii_letters, digits
 from unittest import mock
@@ -14,9 +15,11 @@ from urllib.parse import urljoin, urlparse, quote
 from urllib.request import pathname2url
 
 import lxml.etree
+from testfixtures import LogCapture
 from twisted.internet import defer
 from twisted.trial import unittest
-from w3lib.url import path_to_file_uri
+from w3lib.url import file_uri_to_path, path_to_file_uri
+from zope.interface import implementer
 from zope.interface.verify import verifyObject
 
 import scrapy
@@ -390,6 +393,25 @@ class FromCrawlerFileFeedStorage(FileFeedStorage, FromCrawlerMixin):
     pass
 
 
+@implementer(IFeedStorage)
+class LogOnStoreFileStorage:
+    """
+    This storage logs inside `store` method.
+    It can be used to make sure `store` method is invoked.
+    """
+
+    def __init__(self, uri):
+        self.path = file_uri_to_path(uri)
+        self.logger = getLogger()
+
+    def open(self, spider):
+        return tempfile.NamedTemporaryFile(prefix='feed-')
+
+    def store(self, file):
+        self.logger.info('Storage.store is called')
+        file.close()
+
+
 class FeedExportTest(unittest.TestCase):
 
     class MyItem(scrapy.Item):
@@ -426,11 +448,17 @@ class FeedExportTest(unittest.TestCase):
                 yield runner.crawl(spider_cls)
 
             for file_path, feed in FEEDS.items():
+                if not os.path.exists(str(file_path)):
+                    continue
+
                 with open(str(file_path), 'rb') as f:
                     content[feed['format']] = f.read()
 
         finally:
             for file_path in FEEDS.keys():
+                if not os.path.exists(str(file_path)):
+                    continue
+
                 os.remove(str(file_path))
 
         return content
@@ -623,6 +651,25 @@ class FeedExportTest(unittest.TestCase):
             data = yield self.exported_no_data(settings)
             self.assertEqual(data[fmt], expctd)
 
+    @defer.inlineCallbacks
+    def test_export_no_items_multiple_feeds(self):
+        """ Make sure that `storage.store` is called for every feed. """
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'json'},
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.LogOnStoreFileStorage'},
+            'FEED_STORE_EMPTY': False
+        }
+
+        with LogCapture() as log:
+            yield self.exported_no_data(settings)
+
+        print(log)
+        self.assertEqual(str(log).count('Storage.store is called'), 3)
+
     @defer.inlineCallbacks
     def test_export_multiple_item_classes(self):
 

From 5d541731870eaaa3dd658673b82a7d0ca056f689 Mon Sep 17 00:00:00 2001
From: Devi Sandeep <sandeep0138@gmail.com>
Date: Thu, 18 Jun 2020 05:01:38 -0500
Subject: [PATCH 169/181] Update docs on accessing callback arguments in
 errback (#4634)

---
 docs/topics/request-response.rst | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index bbd715766..d88d40b00 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -189,6 +189,10 @@ Request objects
         cloned using the ``copy()`` or ``replace()`` methods, and can also be
         accessed, in your spider, from the ``response.cb_kwargs`` attribute.
 
+        In case of a failure to process the request, this dict can be accessed as
+        ``failure.request.cb_kwargs`` in the request's errback. For more information,
+        see :ref:`topics-request-response-ref-accessing-callback-arguments-in-errback`.
+
     .. method:: Request.copy()
 
        Return a new Request which is a copy of this Request. See also:
@@ -312,6 +316,31 @@ errors if needed::
                 request = failure.request
                 self.logger.error('TimeoutError on %s', request.url)
 
+.. _topics-request-response-ref-accessing-callback-arguments-in-errback:
+
+Accessing additional data in errback functions
+----------------------------------------------
+
+In case of a failure to process the request, you may be interested in
+accessing arguments to the callback functions so you can process further
+based on the arguments in the errback. The following example shows how to
+achieve this by using ``Failure.request.cb_kwargs``::
+
+    def parse(self, response):
+        request = scrapy.Request('http://www.example.com/index.html',
+                                 callback=self.parse_page2,
+                                 errback=self.errback_page2,
+                                 cb_kwargs=dict(main_url=response.url))
+        yield request
+
+    def parse_page2(self, response, main_url):
+        pass
+
+    def errback_page2(self, failure):
+        yield dict(
+            main_url=failure.request.cb_kwargs['main_url'],
+        )
+
 .. _topics-request-meta:
 
 Request.meta special keys

From 7babf359e0221613b872b5b204e7c523b7b84486 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Thu, 18 Jun 2020 13:52:04 -0300
Subject: [PATCH 170/181] Typing: Tox env, CI job

---
 .gitignore  |   1 +
 .travis.yml |   2 +
 setup.cfg   | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tox.ini     |   7 +++
 4 files changed, 181 insertions(+)

diff --git a/.gitignore b/.gitignore
index ff6e2ea65..83a2569dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ htmlcov/
 .pytest_cache/
 .coverage.*
 .cache/
+.mypy_cache/
 
 # Windows
 Thumbs.db
diff --git a/.travis.yml b/.travis.yml
index e44f85237..b403ac54c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,8 @@ matrix:
       python: 3.8
     - env: TOXENV=docs
       python: 3.7  # Keep in sync with .readthedocs.yml
+    - env: TOXENV=typing
+      python: 3.8
 
     - env: TOXENV=pypy3
     - env: TOXENV=pinned
diff --git a/setup.cfg b/setup.cfg
index 2296a1052..a9138c1c0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,3 +3,174 @@ doc_files = docs AUTHORS INSTALL LICENSE README.rst
 
 [bdist_wheel]
 universal=1
+
+[mypy]
+ignore_missing_imports = true
+follow_imports = skip
+
+# FIXME: remove the following sections once the issues are solved
+
+[mypy-scrapy]
+ignore_errors = True
+
+[mypy-scrapy._monkeypatches]
+ignore_errors = True
+
+[mypy-scrapy.commands]
+ignore_errors = True
+
+[mypy-scrapy.commands.bench]
+ignore_errors = True
+
+[mypy-scrapy.commands.parse]
+ignore_errors = True
+
+[mypy-scrapy.downloadermiddlewares.httpproxy]
+ignore_errors = True
+
+[mypy-scrapy.contracts]
+ignore_errors = True
+
+[mypy-scrapy.core.spidermw]
+ignore_errors = True
+
+[mypy-scrapy.interfaces]
+ignore_errors = True
+
+[mypy-scrapy.item]
+ignore_errors = True
+
+[mypy-scrapy.http.cookies]
+ignore_errors = True
+
+[mypy-scrapy.mail]
+ignore_errors = True
+
+[mypy-scrapy.pipelines.images]
+ignore_errors = True
+
+[mypy-scrapy.settings.default_settings]
+ignore_errors = True
+
+[mypy-scrapy.spidermiddlewares.referer]
+ignore_errors = True
+
+[mypy-scrapy.utils.httpobj]
+ignore_errors = True
+
+[mypy-scrapy.utils.request]
+ignore_errors = True
+
+[mypy-scrapy.utils.response]
+ignore_errors = True
+
+[mypy-scrapy.utils.spider]
+ignore_errors = True
+
+[mypy-scrapy.utils.trackref]
+ignore_errors = True
+
+[mypy-tests.mocks.dummydbm]
+ignore_errors = True
+
+[mypy-tests.spiders]
+ignore_errors = True
+
+[mypy-tests.test_cmdline_crawl_with_pipeline.test_spider.spiders.exception]
+ignore_errors = True
+
+[mypy-tests.test_cmdline_crawl_with_pipeline.test_spider.spiders.normal]
+ignore_errors = True
+
+[mypy-tests.test_command_fetch]
+ignore_errors = True
+
+[mypy-tests.test_command_parse]
+ignore_errors = True
+
+[mypy-tests.test_command_shell]
+ignore_errors = True
+
+[mypy-tests.test_command_version]
+ignore_errors = True
+
+[mypy-tests.test_contracts]
+ignore_errors = True
+
+[mypy-tests.test_crawler]
+ignore_errors = True
+
+[mypy-tests.test_downloader_handlers]
+ignore_errors = True
+
+[mypy-tests.test_engine]
+ignore_errors = True
+
+[mypy-tests.test_exporters]
+ignore_errors = True
+
+[mypy-tests.test_http_request]
+ignore_errors = True
+
+[mypy-tests.test_linkextractors]
+ignore_errors = True
+
+[mypy-tests.test_loader]
+ignore_errors = True
+
+[mypy-tests.test_pipeline_crawl]
+ignore_errors = True
+
+[mypy-tests.test_pipeline_files]
+ignore_errors = True
+
+[mypy-tests.test_pipeline_images]
+ignore_errors = True
+
+[mypy-tests.test_pipelines]
+ignore_errors = True
+
+[mypy-tests.test_request_cb_kwargs]
+ignore_errors = True
+
+[mypy-tests.test_request_left]
+ignore_errors = True
+
+[mypy-tests.test_scheduler]
+ignore_errors = True
+
+[mypy-tests.test_signals]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.nested.spider4]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.spider1]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.spider2]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.spider3]
+ignore_errors = True
+
+[mypy-tests.test_spidermiddleware_httperror]
+ignore_errors = True
+
+[mypy-tests.test_spidermiddleware_output_chain]
+ignore_errors = True
+
+[mypy-tests.test_spidermiddleware_referer]
+ignore_errors = True
+
+[mypy-tests.test_utils_reqser]
+ignore_errors = True
+
+[mypy-tests.test_utils_serialize]
+ignore_errors = True
+
+[mypy-tests.test_utils_spider]
+ignore_errors = True
+
+[mypy-tests.test_utils_url]
+ignore_errors = True
diff --git a/tox.ini b/tox.ini
index 4c790158d..27d21ade2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -23,6 +23,13 @@ passenv =
 commands =
     py.test --cov=scrapy --cov-report= {posargs:--durations=10 docs scrapy tests}
 
+[testenv:typing]
+basepython = python3
+deps =
+    mypy==0.780
+commands =
+    mypy {posargs: scrapy tests}
+
 [testenv:security]
 basepython = python3
 deps =

From a4bfd5ab6fd75c4badac1c5d9b40706181c41bd9 Mon Sep 17 00:00:00 2001
From: Stanislau Hluboki <glubokiy.stas@gmail.com>
Date: Sat, 13 Jun 2020 18:04:38 +0300
Subject: [PATCH 171/181] Fix duplicated feed logs

---
 scrapy/extensions/feedexport.py | 19 +++++++---
 tests/test_feedexport.py        | 63 +++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py
index 30e6349d6..61dad8726 100644
--- a/scrapy/extensions/feedexport.py
+++ b/scrapy/extensions/feedexport.py
@@ -279,11 +279,20 @@ class FeedExporter:
                         'itemcount': slot.itemcount,
                         'uri': slot.uri}
             d = defer.maybeDeferred(slot.storage.store, slot.file)
-            d.addCallback(lambda _: logger.info(logfmt % "Stored", log_args,
-                                                extra={'spider': spider}))
-            d.addErrback(lambda f: logger.error(logfmt % "Error storing", log_args,
-                                                exc_info=failure_to_exc_info(f),
-                                                extra={'spider': spider}))
+
+            # Use `largs=log_args` to copy log_args into function's scope
+            # instead of using `log_args` from the outer scope
+            d.addCallback(
+                lambda _, largs=log_args: logger.info(
+                    logfmt % "Stored", largs, extra={'spider': spider}
+                )
+            )
+            d.addErrback(
+                lambda f, largs=log_args: logger.error(
+                    logfmt % "Error storing", largs,
+                    exc_info=failure_to_exc_info(f), extra={'spider': spider}
+                )
+            )
             deferred_list.append(d)
         return defer.DeferredList(deferred_list) if deferred_list else None
 
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
index f7013bc44..e38644214 100644
--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@@ -393,6 +393,27 @@ class FromCrawlerFileFeedStorage(FileFeedStorage, FromCrawlerMixin):
     pass
 
 
+class DummyBlockingFeedStorage(BlockingFeedStorage):
+
+    def __init__(self, uri):
+        self.path = file_uri_to_path(uri)
+
+    def _store_in_thread(self, file):
+        dirname = os.path.dirname(self.path)
+        if dirname and not os.path.exists(dirname):
+            os.makedirs(dirname)
+        with open(self.path, 'ab') as output_file:
+            output_file.write(file.read())
+
+        file.close()
+
+
+class FailingBlockingFeedStorage(DummyBlockingFeedStorage):
+
+    def _store_in_thread(self, file):
+        raise OSError('Cannot store')
+
+
 @implementer(IFeedStorage)
 class LogOnStoreFileStorage:
     """
@@ -1025,3 +1046,45 @@ class FeedExportTest(unittest.TestCase):
         }
         data = yield self.exported_no_data(settings)
         self.assertEqual(data['csv'], b'')
+
+    @defer.inlineCallbacks
+    def test_multiple_feeds_success_logs_blocking_feed_storage(self):
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'json'},
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.DummyBlockingFeedStorage'},
+        }
+        items = [
+            {'foo': 'bar1', 'baz': ''},
+            {'foo': 'bar2', 'baz': 'quux'},
+        ]
+        with LogCapture() as log:
+            yield self.exported_data(items, settings)
+
+        print(log)
+        for fmt in ['json', 'xml', 'csv']:
+            self.assertIn('Stored %s feed (2 items)' % fmt, str(log))
+
+    @defer.inlineCallbacks
+    def test_multiple_feeds_failing_logs_blocking_feed_storage(self):
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'json'},
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.FailingBlockingFeedStorage'},
+        }
+        items = [
+            {'foo': 'bar1', 'baz': ''},
+            {'foo': 'bar2', 'baz': 'quux'},
+        ]
+        with LogCapture() as log:
+            yield self.exported_data(items, settings)
+
+        print(log)
+        for fmt in ['json', 'xml', 'csv']:
+            self.assertIn('Error storing %s feed (2 items)' % fmt, str(log))

From 3efea98e0518cd0d3b92b06f6b44b701dff1e53d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 22 Jun 2020 12:41:14 -0300
Subject: [PATCH 172/181] Docs: add note about dataclass items and loaders

---
 docs/topics/loaders.rst | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index 6645bf123..d70e03ad4 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -26,7 +26,7 @@ Using Item Loaders to populate items
 To use an Item Loader, you must first instantiate it. You can either
 instantiate it with an :ref:`item object <topics-items>` or without one, in which
 case an instance of :class:`~scrapy.item.Item` is automatically created in the
-Item Loader ``__init__`` method using the :class:`~scrapy.item.Item` subclass
+Item Loader ``__init__`` method using the :ref:`item <topics-items>` class
 specified in the :attr:`ItemLoader.default_item_class` attribute.
 
 Then, you start collecting values into the Item Loader, typically using
@@ -76,6 +76,41 @@ called which actually returns the item populated with the data
 previously extracted and collected with the :meth:`~ItemLoader.add_xpath`,
 :meth:`~ItemLoader.add_css`, and :meth:`~ItemLoader.add_value` calls.
 
+
+.. _topics-loaders-dataclass:
+
+Working with dataclass items
+============================
+
+By default, :ref:`dataclass items <dataclass-items>` require all fields to be
+passed when created. This could be an issue when using dataclass items with
+item loaders, since fields could be populated incrementally.
+
+Given the way that item loaders store data internally, the recommended approach
+to overcome this is to define items using the :func:`~dataclasses.field`
+function, with ``list`` as the ``default_factory`` argument::
+
+    from dataclasses import dataclass, field
+
+    @dataclass
+    class InventoryItem:
+        name: str = field(default_factory=list)
+        price: float = field(default_factory=list)
+        stock: int = field(default_factory=list)
+
+Note that in order to keep the example simple, the types do not match
+completely. A more accurate but verbose definition would be::
+
+    from dataclasses import dataclass, field
+    from typing import List, Union
+
+    @dataclass
+    class InventoryItem:
+        name: Union[str, List[str]] = field(default_factory=list)
+        price: Union[float, List[float]] = field(default_factory=list)
+        stock: Union[int, List[int]] = field(default_factory=list)
+
+
 .. _topics-loaders-processors:
 
 Input and Output processors

From 1335d9053e08c0321927728060d4eba0b6da687b Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com>
Date: Mon, 22 Jun 2020 14:05:44 -0300
Subject: [PATCH 173/181] Update docs/topics/loaders.rst
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrián Chaves <adrian@chaves.io>
---
 docs/topics/loaders.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index d70e03ad4..c5f121dbf 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -25,7 +25,7 @@ Using Item Loaders to populate items
 
 To use an Item Loader, you must first instantiate it. You can either
 instantiate it with an :ref:`item object <topics-items>` or without one, in which
-case an instance of :class:`~scrapy.item.Item` is automatically created in the
+case an :ref:`item object <topics-items>` is automatically created in the
 Item Loader ``__init__`` method using the :ref:`item <topics-items>` class
 specified in the :attr:`ItemLoader.default_item_class` attribute.
 

From 73b6ce8cb560da71b082efdba44b90e1ea932b17 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 22 Jun 2020 14:13:37 -0300
Subject: [PATCH 174/181] Update docs about dataclass items and loaders

---
 docs/topics/loaders.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index c5f121dbf..e921395d2 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -84,9 +84,11 @@ Working with dataclass items
 
 By default, :ref:`dataclass items <dataclass-items>` require all fields to be
 passed when created. This could be an issue when using dataclass items with
-item loaders, since fields could be populated incrementally.
+item loaders: unless a pre-populated item is passed to the loader, fields
+will be populated incrementally using the loader's :meth:`~ItemLoader.add_xpath`,
+:meth:`~ItemLoader.add_css` and :meth:`~ItemLoader.add_value` methods.
 
-Given the way that item loaders store data internally, the recommended approach
+Given the way that item loaders store data internally, one approach
 to overcome this is to define items using the :func:`~dataclasses.field`
 function, with ``list`` as the ``default_factory`` argument::
 

From 3672f5f988cbb29c6bebb7ed535c0d5ad941d868 Mon Sep 17 00:00:00 2001
From: Lukas Anzinger <lukas@lukasanzinger.at>
Date: Tue, 23 Jun 2020 14:51:21 +0200
Subject: [PATCH 175/181] Spider constructor expects name as argument, not
 start_urls.

Fixes #4644
---
 tests/test_scheduler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index 930a5dd99..2b6cb0902 100644
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -296,7 +296,7 @@ class StartUrlsSpider(Spider):
 
     def __init__(self, start_urls):
         self.start_urls = start_urls
-        super(StartUrlsSpider, self).__init__(start_urls)
+        super(StartUrlsSpider, self).__init__(name='StartUrlsSpider')
 
     def parse(self, response):
         pass

From 6f4c964aa4949584cfd251cf2aa2c0c7de6cf251 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Jun 2020 12:26:38 +0200
Subject: [PATCH 176/181] Cover Scrapy 2.2.0 in the release notes (#4630)

---
 docs/contributing.rst            |   5 +-
 docs/news.rst                    | 195 +++++++++++++++++++++++++++++++
 docs/topics/media-pipeline.rst   |   9 +-
 docs/topics/request-response.rst |   4 +-
 scrapy/http/response/text.py     |   2 +
 scrapy/utils/misc.py             |   5 +-
 6 files changed, 214 insertions(+), 6 deletions(-)

diff --git a/docs/contributing.rst b/docs/contributing.rst
index aed5ab92e..7b901dd00 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -155,6 +155,9 @@ Finally, try to keep aesthetic changes (:pep:`8` compliance, unused imports
 removal, etc) in separate commits from functional changes. This will make pull
 requests easier to review and more likely to get merged.
 
+
+.. _coding-style:
+
 Coding style
 ============
 
@@ -163,7 +166,7 @@ Scrapy:
 
 * Unless otherwise specified, follow :pep:`8`.
 
-* It's OK to use lines longer than 80 chars if it improves the code
+* It's OK to use lines longer than 79 chars if it improves the code
   readability.
 
 * Don't put your name in the code you contribute; git provides enough
diff --git a/docs/news.rst b/docs/news.rst
index a158246eb..80d130e4a 100644
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -3,6 +3,201 @@
 Release notes
 =============
 
+.. _release-2.2.0:
+
+Scrapy 2.2.0 (2020-06-24)
+-------------------------
+
+Highlights:
+
+* Python 3.5.2+ is required now
+* :ref:`dataclass objects <dataclass-items>` and
+  :ref:`attrs objects <attrs-items>` are now valid :ref:`item types
+  <item-types>`
+* New :meth:`TextResponse.json <scrapy.http.TextResponse.json>` method
+* New :signal:`bytes_received` signal that allows canceling response download
+* :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` fixes
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   Support for Python 3.5.0 and 3.5.1 has been dropped; Scrapy now refuses to
+    run with a Python version lower than 3.5.2, which introduced
+    :class:`typing.Type` (:issue:`4615`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   :meth:`TextResponse.body_as_unicode
+    <scrapy.http.TextResponse.body_as_unicode>` is now deprecated, use
+    :attr:`TextResponse.text <scrapy.http.TextResponse.text>` instead
+    (:issue:`4546`, :issue:`4555`, :issue:`4579`)
+
+*   :class:`scrapy.item.BaseItem` is now deprecated, use
+    :class:`scrapy.item.Item` instead (:issue:`4534`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   :ref:`dataclass objects <dataclass-items>` and
+    :ref:`attrs objects <attrs-items>` are now valid :ref:`item types
+    <item-types>`, and a new itemadapter_ library makes it easy to
+    write code that :ref:`supports any item type <supporting-item-types>`
+    (:issue:`2749`, :issue:`2807`, :issue:`3761`, :issue:`3881`, :issue:`4642`)
+
+*   A new :meth:`TextResponse.json <scrapy.http.TextResponse.json>` method
+    allows to deserialize JSON responses (:issue:`2444`, :issue:`4460`,
+    :issue:`4574`)
+
+*   A new :signal:`bytes_received` signal allows monitoring response download
+    progress and :ref:`stopping downloads <topics-stop-response-download>`
+    (:issue:`4205`, :issue:`4559`)
+
+*   The dictionaries in the result list of a :ref:`media pipeline
+    <topics-media-pipeline>` now include a new key, ``status``, which indicates
+    if the file was downloaded or, if the file was not downloaded, why it was
+    not downloaded; see :meth:`FilesPipeline.get_media_requests
+    <scrapy.pipelines.files.FilesPipeline.get_media_requests>` for more
+    information (:issue:`2893`, :issue:`4486`)
+
+*   When using :ref:`Google Cloud Storage <media-pipeline-gcs>` for
+    a :ref:`media pipeline <topics-media-pipeline>`, a warning is now logged if
+    the configured credentials do not grant the required permissions
+    (:issue:`4346`, :issue:`4508`)
+
+*   :ref:`Link extractors <topics-link-extractors>` are now serializable,
+    as long as you do not use :ref:`lambdas <lambda>` for parameters; for
+    example, you can now pass link extractors in :attr:`Request.cb_kwargs
+    <scrapy.http.Request.cb_kwargs>` or
+    :attr:`Request.meta <scrapy.http.Request.meta>` when :ref:`persisting
+    scheduled requests <topics-jobs>` (:issue:`4554`)
+
+*   Upgraded the :ref:`pickle protocol <pickle-protocols>` that Scrapy uses
+    from protocol 2 to protocol 4, improving serialization capabilities and
+    performance (:issue:`4135`, :issue:`4541`)
+
+*   :func:`scrapy.utils.misc.create_instance` now raises a :exc:`TypeError`
+    exception if the resulting instance is ``None`` (:issue:`4528`,
+    :issue:`4532`)
+
+.. _itemadapter: https://github.com/scrapy/itemadapter
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` no longer
+    discards cookies defined in :attr:`Request.headers
+    <scrapy.http.Request.headers>` (:issue:`1992`, :issue:`2400`)
+
+*   :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` no longer
+    re-encodes cookies defined as :class:`bytes` in the ``cookies`` parameter
+    of the ``__init__`` method of :class:`~scrapy.http.Request`
+    (:issue:`2400`, :issue:`3575`)
+
+*   When :setting:`FEEDS` defines multiple URIs, :setting:`FEED_STORE_EMPTY` is
+    ``False`` and the crawl yields no items, Scrapy no longer stops feed
+    exports after the first URI (:issue:`4621`, :issue:`4626`)
+
+*   :class:`~scrapy.spiders.Spider` callbacks defined using :doc:`coroutine
+    syntax <topics/coroutines>` no longer need to return an iterable, and may
+    instead return a :class:`~scrapy.http.Request` object, an
+    :ref:`item <topics-items>`, or ``None`` (:issue:`4609`)
+
+*   The :command:`startproject` command now ensures that the generated project
+    folders and files have the right permissions (:issue:`4604`)
+
+*   Fix a :exc:`KeyError` exception being sometimes raised from
+    :class:`scrapy.utils.datatypes.LocalWeakReferencedCache` (:issue:`4597`,
+    :issue:`4599`)
+
+*   When :setting:`FEEDS` defines multiple URIs, log messages about items being
+    stored now contain information from the corresponding feed, instead of
+    always containing information about only one of the feeds (:issue:`4619`,
+    :issue:`4629`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Added a new section about :ref:`accessing cb_kwargs from errbacks
+    <errback-cb_kwargs>` (:issue:`4598`, :issue:`4634`)
+
+*   Covered chompjs_ in :ref:`topics-parsing-javascript` (:issue:`4556`,
+    :issue:`4562`)
+
+*   Removed from :doc:`topics/coroutines` the warning about the API being
+    experimental (:issue:`4511`, :issue:`4513`)
+
+*   Removed references to unsupported versions of :doc:`Twisted
+    <twisted:index>` (:issue:`4533`)
+
+*   Updated the description of the :ref:`screenshot pipeline example
+    <ScreenshotPipeline>`, which now uses :doc:`coroutine syntax
+    <topics/coroutines>` instead of returning a
+    :class:`~twisted.internet.defer.Deferred` (:issue:`4514`, :issue:`4593`)
+
+*   Removed a misleading import line from the
+    :func:`scrapy.utils.log.configure_logging` code example (:issue:`4510`,
+    :issue:`4587`)
+
+*   The display-on-hover behavior of internal documentation references now also
+    covers links to :ref:`commands <topics-commands>`, :attr:`Request.meta
+    <scrapy.http.Request.meta>` keys, :ref:`settings <topics-settings>` and
+    :ref:`signals <topics-signals>` (:issue:`4495`, :issue:`4563`)
+
+*   It is again possible to download the documentation for offline reading
+    (:issue:`4578`, :issue:`4585`)
+
+*   Removed backslashes preceding ``*args`` and ``**kwargs`` in some function
+    and method signatures (:issue:`4592`, :issue:`4596`)
+
+.. _chompjs: https://github.com/Nykakin/chompjs
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Adjusted the code base further to our :ref:`style guidelines
+    <coding-style>` (:issue:`4237`, :issue:`4525`, :issue:`4538`,
+    :issue:`4539`, :issue:`4540`, :issue:`4542`, :issue:`4543`, :issue:`4544`,
+    :issue:`4545`, :issue:`4557`, :issue:`4558`, :issue:`4566`, :issue:`4568`,
+    :issue:`4572`)
+
+*   Removed remnants of Python 2 support (:issue:`4550`, :issue:`4553`,
+    :issue:`4568`)
+
+*   Improved code sharing between the :command:`crawl` and :command:`runspider`
+    commands (:issue:`4548`, :issue:`4552`)
+
+*   Replaced ``chain(*iterable)`` with ``chain.from_iterable(iterable)``
+    (:issue:`4635`)
+
+*   You may now run the :mod:`asyncio` tests with Tox on any Python version
+    (:issue:`4521`)
+
+*   Updated test requirements to reflect an incompatibility with pytest 5.4 and
+    5.4.1 (:issue:`4588`)
+
+*   Improved :class:`~scrapy.spiderloader.SpiderLoader` test coverage for
+    scenarios involving duplicate spider names (:issue:`4549`, :issue:`4560`)
+
+*   Configured Travis CI to also run the tests with Python 3.5.2
+    (:issue:`4518`, :issue:`4615`)
+
+*   Added a `Pylint <https://www.pylint.org/>`_ job to Travis CI
+    (:issue:`3727`)
+
+*   Added a `Mypy <http://mypy-lang.org/>`_ job to Travis CI (:issue:`4637`)
+
+*   Made use of set literals in tests (:issue:`4573`)
+
+*   Cleaned up the Travis CI configuration (:issue:`4517`, :issue:`4519`,
+    :issue:`4522`, :issue:`4537`)
+
+
 .. _release-2.1.0:
 
 Scrapy 2.1.0 (2020-04-24)
diff --git a/docs/topics/media-pipeline.rst b/docs/topics/media-pipeline.rst
index 01de3dedb..9f2a06dd7 100644
--- a/docs/topics/media-pipeline.rst
+++ b/docs/topics/media-pipeline.rst
@@ -201,6 +201,9 @@ For self-hosting you also might feel the need not to use SSL and not to verify S
 .. _s3.scality: https://s3.scality.com/
 .. _canned ACLs: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl
 
+
+.. _media-pipeline-gcs:
+
 Google Cloud Storage
 ---------------------
 
@@ -475,7 +478,11 @@ See here the methods that you can override in your custom Files Pipeline:
 
         * ``checksum`` - a `MD5 hash`_ of the image contents
 
-        * ``status`` - the file status indication. It can be one of the following:
+        * ``status`` - the file status indication.
+
+          .. versionadded:: 2.2
+
+          It can be one of the following:
 
           * ``downloaded`` - file was downloaded.
           * ``uptodate`` - file was not downloaded, as it was downloaded recently,
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index d88d40b00..fbd8e4b73 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -191,7 +191,7 @@ Request objects
 
         In case of a failure to process the request, this dict can be accessed as
         ``failure.request.cb_kwargs`` in the request's errback. For more information,
-        see :ref:`topics-request-response-ref-accessing-callback-arguments-in-errback`.
+        see :ref:`errback-cb_kwargs`.
 
     .. method:: Request.copy()
 
@@ -316,7 +316,7 @@ errors if needed::
                 request = failure.request
                 self.logger.error('TimeoutError on %s', request.url)
 
-.. _topics-request-response-ref-accessing-callback-arguments-in-errback:
+.. _errback-cb_kwargs:
 
 Accessing additional data in errback functions
 ----------------------------------------------
diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
index 40cf3f483..b43fe5c19 100644
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@@ -74,6 +74,8 @@ class TextResponse(Response):
 
     def json(self):
         """
+        .. versionadded:: 2.2
+
         Deserialize a JSON document to a Python object.
         """
         if self._cached_decoded_json is _NONE:
diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py
index a7808cb2c..8e5fde246 100644
--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@@ -138,8 +138,9 @@ def create_instance(objcls, settings, crawler, *args, **kwargs):
 
     Raises ``ValueError`` if both ``settings`` and ``crawler`` are ``None``.
 
-    Raises ``TypeError`` if the resulting instance is ``None`` (e.g. if an
-    extension has not been implemented correctly).
+    .. versionchanged:: 2.2
+       Raises ``TypeError`` if the resulting instance is ``None`` (e.g. if an
+       extension has not been implemented correctly).
     """
     if settings is None:
         if crawler is None:

From 9f60481360628bafe467f59b2144d69204b8b4e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Jun 2020 12:27:39 +0200
Subject: [PATCH 177/181] =?UTF-8?q?Bump=20version:=202.1.0=20=E2=86=92=202?=
 =?UTF-8?q?.2.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg | 2 +-
 scrapy/VERSION   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index de22a2783..8d4d74bc5 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.1.0
+current_version = 2.2.0
 commit = True
 tag = True
 tag_name = {new_version}
diff --git a/scrapy/VERSION b/scrapy/VERSION
index 7ec1d6db4..ccbccc3dc 100644
--- a/scrapy/VERSION
+++ b/scrapy/VERSION
@@ -1 +1 @@
-2.1.0
+2.2.0

From 464f24f8c160466f93eb8ebb4ec8b84d1824eaa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Hern=C3=A1ndez?= <noviluni@gmail.com>
Date: Mon, 29 Jun 2020 14:20:29 +0200
Subject: [PATCH 178/181] Add --data-raw to utils.curl and fix missing method
 with data (#4612)

---
 scrapy/utils/curl.py     | 12 +++++++++---
 tests/test_utils_curl.py | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/scrapy/utils/curl.py b/scrapy/utils/curl.py
index 16639356e..67b22dbc5 100644
--- a/scrapy/utils/curl.py
+++ b/scrapy/utils/curl.py
@@ -17,8 +17,8 @@ class CurlParser(argparse.ArgumentParser):
 curl_parser = CurlParser()
 curl_parser.add_argument('url')
 curl_parser.add_argument('-H', '--header', dest='headers', action='append')
-curl_parser.add_argument('-X', '--request', dest='method', default='get')
-curl_parser.add_argument('-d', '--data', dest='data')
+curl_parser.add_argument('-X', '--request', dest='method')
+curl_parser.add_argument('-d', '--data', '--data-raw', dest='data')
 curl_parser.add_argument('-u', '--user', dest='auth')
 
 
@@ -66,7 +66,9 @@ def curl_to_request_kwargs(curl_command, ignore_unknown_options=True):
     if not parsed_url.scheme:
         url = 'http://' + url
 
-    result = {'method': parsed_args.method.upper(), 'url': url}
+    method = parsed_args.method or 'GET'
+
+    result = {'method': method.upper(), 'url': url}
 
     headers = []
     cookies = {}
@@ -90,5 +92,9 @@ def curl_to_request_kwargs(curl_command, ignore_unknown_options=True):
         result['cookies'] = cookies
     if parsed_args.data:
         result['body'] = parsed_args.data
+        if not parsed_args.method:
+            # if the "data" is specified but the "method" is not specified,
+            # the default method is 'POST'
+            result['method'] = 'POST'
 
     return result
diff --git a/tests/test_utils_curl.py b/tests/test_utils_curl.py
index 50e1bfd5f..299a51efe 100644
--- a/tests/test_utils_curl.py
+++ b/tests/test_utils_curl.py
@@ -141,6 +141,31 @@ class CurlToRequestKwargsTest(unittest.TestCase):
         }
         self._test_command(curl_command, expected_result)
 
+    def test_post_data_raw(self):
+        curl_command = (
+            "curl 'https://www.example.org/' --data-raw 'excerptLength=200&ena"
+            "bleDidYouMean=true&sortCriteria=ffirstz32xnamez32x201740686%20asc"
+            "ending&queryFunctions=%5B%5D&rankingFunctions=%5B%5D'"
+        )
+        expected_result = {
+            "method": "POST",
+            "url": "https://www.example.org/",
+            "body": (
+                "excerptLength=200&enableDidYouMean=true&sortCriteria=ffirstz3"
+                "2xnamez32x201740686%20ascending&queryFunctions=%5B%5D&ranking"
+                "Functions=%5B%5D")
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_explicit_get_with_data(self):
+        curl_command = 'curl httpbin.org/anything -X GET --data asdf'
+        expected_result = {
+            "method": "GET",
+            "url": "http://httpbin.org/anything",
+            "body": "asdf"
+        }
+        self._test_command(curl_command, expected_result)
+
     def test_patch(self):
         curl_command = (
             'curl "https://example.com/api/fake" -u "username:password" -H "Ac'

From e46b47c365b27e3f3f383a102122361db0a86a42 Mon Sep 17 00:00:00 2001
From: Aditya Kumar <k.aditya00@gmail.com>
Date: Mon, 29 Jun 2020 18:35:13 +0530
Subject: [PATCH 179/181] Renew the localhost certificate for tests (#4650)

Validity
  Not Before: Jun 28 12:54:15 2020 GMT
  Not After : Jun 28 12:54:15 2021 GMT
Subject: C = IE, O = Scrapy, CN = localhost
---
 tests/keys/localhost.crt | 36 ++++++++++++++--------------
 tests/keys/localhost.key | 52 ++++++++++++++++++++--------------------
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/tests/keys/localhost.crt b/tests/keys/localhost.crt
index 13c5b5bd6..0cf5256d8 100644
--- a/tests/keys/localhost.crt
+++ b/tests/keys/localhost.crt
@@ -1,20 +1,20 @@
 -----BEGIN CERTIFICATE-----
-MIIDNzCCAh+gAwIBAgIJANWqWyPdTY8CMA0GCSqGSIb3DQEBCwUAMDIxCzAJBgNV
-BAYTAklFMQ8wDQYDVQQKDAZTY3JhcHkxEjAQBgNVBAMMCWxvY2FsaG9zdDAeFw0x
-NzA0MjcxNzQxNTdaFw0xODA0MjcxNzQxNTdaMDIxCzAJBgNVBAYTAklFMQ8wDQYD
-VQQKDAZTY3JhcHkxEjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEB
-BQADggEPADCCAQoCggEBAK1jcwlJ+bpr63lmK1mSk83nduF+27EPTU3RyteoPM2K
-o/RqZnr/mR29U6Pu42YuhLvBUu7rQxGi+rgkwno6lMFP4y5glxRygIlPsP4WQO3Y
-njmysWfYxQoIml2A+tiLewrMZocHI2cNgrO8Fd0u7KMiLlvUCN0pVyOwZ/ym9rPY
-ObfquG/xYTFzgYD/wy1n4AXE4ve3uZPfB3ZGtB3fUmuowg5KZ1L3uWpviyqr1qB/
-8NXcORLegAPsquLA05gnDPOuMs7dSMeKMphvpbSerRXLGxLIfWOZ0rs8oV96Re52
-gSEg/kIIS+ts37sJofcEnx9C4FkTR8zXin9eZhgCYs0CAwEAAaNQME4wHQYDVR0O
-BBYEFOoYbg0MvcnbTN0jxISsP2ctMbjpMB8GA1UdIwQYMBaAFOoYbg0MvcnbTN0j
-xISsP2ctMbjpMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAF/JlzES
-9Z3Azaj60gvJHyPJsPSM4tUfnWoFfFrui3oPG5TJPxWqrLBsTEachUTKOd5+XR2i
-jxUuREMkcRjbc0jjsqhsxPvfgrUrbIvKjEFLfAPvvLvcQIMUJf09SEjaaMkUAYd+
-TJaxFn5kd9Q6HbkD/fEN+lKhNZI40IJvfu7u4emUj3uKy9zrw576/T8aDYUl/own
-tqqfXh/jN8wnKCQwma7gaPmMOMqBt6zCsrN9/eKnMBpdULkUtjJD4NDg03XUFLlM
-am/oQ+MnasCcctkaXKbTGx3WfBVmkGj4b3Au18CVZkRWN2QsMdBC8JLRTICKse8U
-Mjybr/hQK3mnVdE=
+MIIDRTCCAi2gAwIBAgIUGoISfeW3LwSWHC52ORXdZY9pNLswDQYJKoZIhvcNAQEL
+BQAwMjELMAkGA1UEBhMCSUUxDzANBgNVBAoMBlNjcmFweTESMBAGA1UEAwwJbG9j
+YWxob3N0MB4XDTIwMDYyODEyNTQxNVoXDTIxMDYyODEyNTQxNVowMjELMAkGA1UE
+BhMCSUUxDzANBgNVBAoMBlNjcmFweTESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjAN
+BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvCLxfTEQuIdf8JhiHrbVkGHYrNSK
+2XD2TCPaSIpJ2KKlFUrIz3A9tWlOfLnWabS5od89yOebhYj4DN/Qm2TViGg1mtWe
+pD1K2YWd1Af+hhAw5D+TpW2RH9TVhX7Ey5osWcl+0uy+RlKZE8qum72xi1vxWOmH
+wYw06iN8klQ3JfP2/eLRXBQjsh7WW0dbJ7yLvG6UFz1RbhFTtlxeIMenzNsHaMg7
+56Ru57/MMbaBwdBttXVzJDQ7imo8njuxDMszliC/QgIdBUBFzA2LB5qpr+v+laDN
+cN9t9Q9stsu446dFnRoofxJjMFW7lLu6h/lwP5r0kfeUkMDhXJ4mb6KwfwIDAQAB
+o1MwUTAdBgNVHQ4EFgQUVEdXn8ha2FA73zcy1Ia0FQMzMEYwHwYDVR0jBBgwFoAU
+VEdXn8ha2FA73zcy1Ia0FQMzMEYwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0B
+AQsFAAOCAQEAZpGBPsexMD+IwcMNIgc7FiaJsb8E30C9vWxgdnkpapi9zLJ4yiHQ
+VxkV9RTezUEADkaDj+2qFveamWTzJLnphgaaUpVeMcYACPhRVOYXidNrZyTmHIsX
+FwaTzAggW6CP7JxAcpxH0f9+NWFCZI36FihRdwuWyvrUl7rsXaexu0SOI/Ck0oWf
+2IW+jo67TSmcbte+J8wq77DX32mVLb/2nqpItH4T2Di+XjVBARACVOSdgdlo7lZE
+W8mSEXqP2BVx8JGG8X1znNLHcmjVj4EtkpH0wkYzpC4cvGkTsUcU7CU7ZyVUp+Bb
+dPMVxyRKWfAjRJc8o5Ot1mgHrx5coOtzAA==
 -----END CERTIFICATE-----
diff --git a/tests/keys/localhost.key b/tests/keys/localhost.key
index da975e6d3..8fc373bdd 100644
--- a/tests/keys/localhost.key
+++ b/tests/keys/localhost.key
@@ -1,28 +1,28 @@
 -----BEGIN PRIVATE KEY-----
-MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCtY3MJSfm6a+t5
-ZitZkpPN53bhftuxD01N0crXqDzNiqP0amZ6/5kdvVOj7uNmLoS7wVLu60MRovq4
-JMJ6OpTBT+MuYJcUcoCJT7D+FkDt2J45srFn2MUKCJpdgPrYi3sKzGaHByNnDYKz
-vBXdLuyjIi5b1AjdKVcjsGf8pvaz2Dm36rhv8WExc4GA/8MtZ+AFxOL3t7mT3wd2
-RrQd31JrqMIOSmdS97lqb4sqq9agf/DV3DkS3oAD7KriwNOYJwzzrjLO3UjHijKY
-b6W0nq0VyxsSyH1jmdK7PKFfekXudoEhIP5CCEvrbN+7CaH3BJ8fQuBZE0fM14p/
-XmYYAmLNAgMBAAECggEAQKY4GlqO1seugRFrUHaqzbdkSCf42kgOVtnGfCqqoSj0
-gQm7NFlhSglxykokV9E4hJlMxvDJjSXrvgVWziRRmtKiroQtUN5wtsIUCGlbxFNk
-i7bpFwNoVJlolTymS1+WfSxBfk9XD/GlrkaPEG2SpjD0gCDLPUtQxmncHARVMDDu
-Eysk3njGghsTF7XMh8ljTE3CqqNSx9BkeWQr6EYfXcgaQ2jp9E+FspB5+KWeO4ss
-ELVHgtwmYSRPAEuz4XHz87RLuakqafko6ftvh3upVQwm0VXuwM+lEUYZrzoU2JQ4
-hePKHRaWQC4tawV6FyVHK4X0MuKP4uESr7YHbJ03sQKBgQDV4CyQU6xccW6hMxlD
-7hvrGcPQEPg6M4rX2uqWpB6RCh6stZEydYeh5S+A6ltml/2csw9Bl8nZM6KbArZa
-EKrZcOn7JgFyPpiDHqgEIx+9XL/mnsKMSkBKTFcvucVgjIWE8GT7jfAqMkcSysWf
-uRyUvtNpshmRLcdNhEjrr3vcwwKBgQDPid6sxBVcoyvrYUsRRVpXATJ9tsmU93LG
-HMHDlXkZ2CMfEuA0xLK+B9iyHMhh8NwYFjcG5oeVyVjE8SbifX4Sg49hde8ykXSR
-UBSNt22/JaWgreL95LEC/y9q+G4osli7NwRW1x6tB5cN1mE0hZI8Z0ETvyr3DoWO
-j/dbdFYJLwKBgDjVLCJiCbA6+EHfuTwC3upXW2BD0iJtJdz8MFA9Zl32SXZtfRri
-fls38qqYHBekFeF493nfouSTwwbb7qb6PNwxFAwH6mR4W8Cj+dO3nayNI/VdhKcQ
-6AqWRKjK/bcNQEG2O69Y5VPhLl/BAEjUQNMJ7lXs3LxmZMqld1cht5FPAoGBAJbI
-xXbiU97lUmCGZKLcr4EtBoEdz6GiksnrVMAEFmM3jHTkIu9TxcWZL9BgZxn5g/8g
-DMS/styZ2BvmVWkS4gkTepXFuI8V7Qoyk2xPS7Yn5QkzrQroH89clhfy/R4mTZ9f
-npB1ZP0z2YSdMCyXqyKlpjtxlga/jzt/z6irgmLTAoGAPrmudajtSBq534Ql2lPM
-8U6baRSAMMzV7MXcR8F1CRewQiYOzlgsB8toELNtjg1IGPqmoiNDDKmkHs3R2mO6
-J45kDPLFe9DTyZLZj0pWWK6yRLc/BA/gGzKFpMkNcyzLlQjNPqY/9mrrYea4J9Cj
-Z+pMCFLbwAbFZ9Qb/NFlUv0=
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC8IvF9MRC4h1/w
+mGIettWQYdis1IrZcPZMI9pIiknYoqUVSsjPcD21aU58udZptLmh3z3I55uFiPgM
+39CbZNWIaDWa1Z6kPUrZhZ3UB/6GEDDkP5OlbZEf1NWFfsTLmixZyX7S7L5GUpkT
+yq6bvbGLW/FY6YfBjDTqI3ySVDcl8/b94tFcFCOyHtZbR1snvIu8bpQXPVFuEVO2
+XF4gx6fM2wdoyDvnpG7nv8wxtoHB0G21dXMkNDuKajyeO7EMyzOWIL9CAh0FQEXM
+DYsHmqmv6/6VoM1w3231D2y2y7jjp0WdGih/EmMwVbuUu7qH+XA/mvSR95SQwOFc
+niZvorB/AgMBAAECggEAHVpSVRb/pdqxNEeCH4qlHWa2uJhcpXpDYzPAzcqNpPgT
+S5QkaoD3j8NDVKBl/I4O3FuJNzwzfo0VLmUJFgWQbzzbCDJGExfhArkfG8K3ilEi
+X6ovrgK/PrklKzPRHncKbmPKnrwDH9OpQHZB8diRx81rhVTCModehh1NRUNQa2I1
+QzFC7uyXx3duoIsI5QXVeEGuwHZfqIY/z+9SscdVFL6elXTPFUzBzcmAqQgdgWKN
+HXgX22LE0rAu8NnRvOZZWt4/nOjvlCFCPTB11NgthmKlVnsx4H7gpQ2OPh4bZ+0W
+birVEtZ3E1jxoGvw1FzxyqqpGkcanRMa8QWzK4JwuQKBgQDrgclpkqZrgHB/TC1p
+hLvsdflGI2SGs+c/mYR3GEjf0kJtI88WL5fj1QezdkDyOpwxFvnLslswfzdtzvis
+vksGysV35vhMPQUcmWhvzA7Pdxdv4BZr+ckER0SAYBBxg9KYZyxewGb5XzB8Cz2o
+8V+YpwrMAOYGuXHTfafv4CKlTQKBgQDMgetvV9/E3HNtKsATiPIwT3e1MzyPXigq
+12NkHSZa6s4yqm/h/fSUn54sJbhx+OtRRhktOo0aB34tcogtrJyClvCPdRAP/4Qi
+M43FjKo2cWiubWvtWlOZU04bpClG324q420rK7dCA2stID/Fa0sMQgAAyPH8TGMo
+gbvyrk4W+wKBgQDMIOnYZTF0epaH8BponJFaqwMOhTzr+OGW4dTMebMotZG4EdK8
+kzIfW5XaOsSecKjTb+vCYGzkA1CjEEPBTwuu7nDstblAM5/Lozi/tmqb7sjUwrIM
+kyxmVfONJjb6fV07lioCUtiui5B15DRkzBqlMRyNqLW43GJKA19d7rN4/QKBgCzy
+kRBTu/bEjQn9T2H7w18i2CiXLkREaYeg91NVpMxutwsjspt0+YCA5H7He5ZxIycl
+xPrP15tU8kKC3bNMMMny6sRc8j7R5fSuaAZ3OCHnIx7TJdlw9NbKHGyu0/Ojv87l
+VWUbopd7sN6mK930CvaSuvVxNN5C27hXazuXW8ppAoGBANcWsenNKpCJgF0cNPHX
+abPaWfcs5FKMNz8gEdGk3B1z/KBpYz59smPwurYVCXaWE6iv99sDOP7CVneF02sV
+SqyNzVhcVSG788uB3CwnpEvm7ydoH89L5dvYekAHP8RJulhWCK45lXkHLiYGKvhv
+PWuPk5VX+qF78JhUhPO3nfnu
 -----END PRIVATE KEY-----

From 5b88c522ac1b1a9ba1588573d90cf3bc01339282 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Tue, 30 Jun 2020 12:18:21 -0300
Subject: [PATCH 180/181] Simplify dataclass example in item loader docs

---
 docs/topics/loaders.rst | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index e921395d2..9c82bb4d9 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -88,29 +88,17 @@ item loaders: unless a pre-populated item is passed to the loader, fields
 will be populated incrementally using the loader's :meth:`~ItemLoader.add_xpath`,
 :meth:`~ItemLoader.add_css` and :meth:`~ItemLoader.add_value` methods.
 
-Given the way that item loaders store data internally, one approach
-to overcome this is to define items using the :func:`~dataclasses.field`
-function, with ``list`` as the ``default_factory`` argument::
+One approach to overcome this is to define items using the
+:func:`~dataclasses.field` function, with a ``default`` argument::
 
     from dataclasses import dataclass, field
+    from typing import Optional
 
     @dataclass
     class InventoryItem:
-        name: str = field(default_factory=list)
-        price: float = field(default_factory=list)
-        stock: int = field(default_factory=list)
-
-Note that in order to keep the example simple, the types do not match
-completely. A more accurate but verbose definition would be::
-
-    from dataclasses import dataclass, field
-    from typing import List, Union
-
-    @dataclass
-    class InventoryItem:
-        name: Union[str, List[str]] = field(default_factory=list)
-        price: Union[float, List[float]] = field(default_factory=list)
-        stock: Union[int, List[int]] = field(default_factory=list)
+        name: Optional[str] = field(default=None)
+        price: Optional[float] = field(default=None)
+        stock: Optional[int] = field(default=None)
 
 
 .. _topics-loaders-processors:

From af55d23167f9cec22f815bc9f9884b10a9a35f5b Mon Sep 17 00:00:00 2001
From: Andrey Rahmatullin <wrar@wrar.name>
Date: Wed, 1 Jul 2020 19:46:54 +0500
Subject: [PATCH 181/181] Update the OpenSSL cipher list format link

OpenSSL `ciphers(1)` is now almost empty: https://www.openssl.org/docs/manmaster/man1/ciphers.html

Alternative would be linking to 1.1.1 docs specifically.
---
 docs/topics/settings.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index 5178f272f..8cc8806a5 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -469,7 +469,7 @@ necessary to access certain HTTPS websites: for example, you may need to use
 ``'DEFAULT:!DH'`` for a website with weak DH parameters or enable a
 specific cipher that is not included in ``DEFAULT`` if a website requires it.
 
-.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/ciphers.html#CIPHER-LIST-FORMAT
+.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/openssl-ciphers.html#CIPHER-LIST-FORMAT
 
 .. setting:: DOWNLOADER_CLIENT_TLS_METHOD