Merge branch 'master' into azure-pipelines

2025-03-14 16:58:20 +00:00 · 2020-07-02 17:49:42 +02:00 · 2020-07-02 17:49:42 +02:00 · 6e58da1dcd
commit 6e58da1dcd
parent 1fecacbb1a 56a6d22352
210 changed files with 4961 additions and 2528 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.0.0
+current_version = 2.2.0
 commit = True
 tag = True
 tag_name = {new_version}
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ htmlcov/
 .pytest_cache/
 .coverage.*
 .cache/
+.mypy_cache/

 # Windows
 Thumbs.db
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -1,4 +1,5 @@
 version: 2
+formats: all
 sphinx:
  configuration: docs/conf.py
  fail_on_warning: true
--- a/.travis.yml
+++ b/.travis.yml
@ -11,25 +11,35 @@ matrix:
      python: 3.8
    - env: TOXENV=flake8
      python: 3.8
-    - env: TOXENV=pypy3
-    - env: TOXENV=py35
-      python: 3.5
-    - env: TOXENV=pinned
-      python: 3.5
-    - env: TOXENV=py35-asyncio
-      python: 3.5.2
-    - env: TOXENV=py36
-      python: 3.6
-    - env: TOXENV=py37
-      python: 3.7
-    - env: TOXENV=py38
-      python: 3.8
-    - env: TOXENV=extra-deps
-      python: 3.8
-    - env: TOXENV=py38-asyncio
+    - env: TOXENV=pylint
      python: 3.8
    - env: TOXENV=docs
      python: 3.7  # Keep in sync with .readthedocs.yml
+    - env: TOXENV=typing
+      python: 3.8
+
+    - env: TOXENV=pypy3
+    - env: TOXENV=pinned
+      python: 3.5.2
+    - env: TOXENV=asyncio
+      python: 3.5.2  # We use additional code to support 3.5.3 and earlier
+    - env: TOXENV=py
+      python: 3.5
+    - env: TOXENV=asyncio
+      python: 3.5  # We use specific code to support >= 3.5.4, < 3.6
+    - env: TOXENV=py
+      python: 3.6
+    - env: TOXENV=py
+      python: 3.7
+    - env: TOXENV=py PYPI_RELEASE_JOB=true
+      python: 3.8
+      dist: bionic
+    - env: TOXENV=extra-deps
+      python: 3.8
+      dist: bionic
+    - env: TOXENV=asyncio
+      python: 3.8
+      dist: bionic
 install:
  - |
      if [ "$TOXENV" = "pypy3" ]; then
@ -62,4 +72,4 @@ deploy:
  on:
    tags: true
    repo: scrapy/scrapy
-    condition: "$TOXENV == py37 && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$"
+    condition: "$PYPI_RELEASE_JOB == true && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$"
--- a/README.rst
+++ b/README.rst
@ -40,7 +40,7 @@ including a list of features.
 Requirements
 ============

-* Python 3.5+
+* Python 3.5.2+
 * Works on Linux, Windows, macOS, BSD

 Install
--- a/conftest.py
+++ b/conftest.py
@ -12,6 +12,8 @@ collect_ignore = [
    "scrapy/utils/testsite.py",
    # contains scripts to be run by tests/test_crawler.py::CrawlerProcessSubprocess
    *_py_files("tests/CrawlerProcess"),
+    # contains scripts to be run by tests/test_crawler.py::CrawlerRunnerSubprocess
+    *_py_files("tests/CrawlerRunner"),
    # Py36-only parts of respective tests
    *_py_files("tests/py36"),
 ]
--- a/docs/README.rst
+++ b/docs/README.rst
@ -57,3 +57,12 @@ There is a way to recreate the doc automatically when you make changes, you
 need to install watchdog (``pip install watchdog``) and then use::

    make watch
+
+Alternative method using tox
+----------------------------
+
+To compile the documentation to HTML run the following command::
+
+    tox -e docs
+
+Documentation will be generated (in HTML format) inside the ``.tox/docs/tmp/html`` dir.
--- a/docs/conf.py
+++ b/docs/conf.py
@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # Scrapy documentation build configuration file, created by
 # sphinx-quickstart on Mon Nov 24 12:02:52 2008.
 #
@ -102,6 +100,9 @@ exclude_trees = ['.build']
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'

+# List of Sphinx warnings that will not be raised
+suppress_warnings = ['epub.unknown_project_files']
+

 # Options for HTML output
 # -----------------------
@ -280,6 +281,7 @@ coverage_ignore_pyobjects = [
 # -------------------------------------

 intersphinx_mapping = {
+    'attrs': ('https://www.attrs.org/en/stable/', None),
    'coverage': ('https://coverage.readthedocs.io/en/stable', None),
    'cssselect': ('https://cssselect.readthedocs.io/en/latest', None),
    'pytest': ('https://docs.pytest.org/en/latest', None),
@ -295,3 +297,11 @@ intersphinx_mapping = {
 # ------------------------------------

 hoverxref_auto_ref = True
+hoverxref_role_types = {
+    "class": "tooltip",
+    "confval": "tooltip",
+    "hoverxref": "tooltip",
+    "mod": "tooltip",
+    "ref": "tooltip",
+}
+hoverxref_roles = ['command', 'reqmeta', 'setting', 'signal']
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@ -155,6 +155,9 @@ Finally, try to keep aesthetic changes (:pep:`8` compliance, unused imports
 removal, etc) in separate commits from functional changes. This will make pull
 requests easier to review and more likely to get merged.

+
+.. _coding-style:
+
 Coding style
 ============

@ -163,7 +166,7 @@ Scrapy:

 * Unless otherwise specified, follow :pep:`8`.

-* It's OK to use lines longer than 80 chars if it improves the code
+* It's OK to use lines longer than 79 chars if it improves the code
  readability.

 * Don't put your name in the code you contribute; git provides enough
--- a/docs/faq.rst
+++ b/docs/faq.rst
@ -69,7 +69,7 @@ Here's an example spider using BeautifulSoup API, with ``lxml`` as the HTML pars
 What Python versions does Scrapy support?
 -----------------------------------------

-Scrapy is supported under Python 3.5+
+Scrapy is supported under Python 3.5.2+
 under CPython (default Python implementation) and PyPy (starting with PyPy 5.9).
 Python 3 support was added in Scrapy 1.1.
 PyPy support was added in Scrapy 1.4, PyPy3 support was added in Scrapy 1.5.
@ -342,15 +342,15 @@ method for this purpose. For example::

    from copy import deepcopy

-    from scrapy.item import BaseItem
-
+    from itemadapter import is_item, ItemAdapter

    class MultiplyItemsMiddleware:

        def process_spider_output(self, response, result, spider):
            for item in result:
-                if isinstance(item, (BaseItem, dict)):
-                    for _ in range(item['multiply_by']):
+                if is_item(item):
+                    adapter = ItemAdapter(item)
+                    for _ in range(adapter['multiply_by']):
                        yield deepcopy(item)

 Does Scrapy support IPv6 addresses?
@ -371,6 +371,19 @@ Twisted reactor is :class:`twisted.internet.selectreactor.SelectReactor`. Switch
 different reactor is possible by using the :setting:`TWISTED_REACTOR` setting.


+.. _faq-stop-response-download:
+
+How can I cancel the download of a given response?
+--------------------------------------------------
+
+In some situations, it might be useful to stop the download of a certain response.
+For instance, if you only need the first part of a large response and you would like
+to save resources by avoiding the download of the whole body.
+In that case, you could attach a handler to the :class:`~scrapy.signals.bytes_received`
+signal and raise a :exc:`~scrapy.exceptions.StopDownload` exception. Please refer to
+the :ref:`topics-stop-response-download` topic for additional information and examples.
+
+
 .. _has been reported: https://github.com/scrapy/scrapy/issues/2905
 .. _user agents: https://en.wikipedia.org/wiki/User_agent
 .. _LIFO: https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@ -7,7 +7,7 @@ Installation guide
 Installing Scrapy
 =================

-Scrapy runs on Python 3.5 or above under CPython (default Python
+Scrapy runs on Python 3.5.2 or above under CPython (default Python
 implementation) and PyPy (starting with PyPy 5.9).

 If you're using `Anaconda`_ or `Miniconda`_, you can install the package from
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@ -287,8 +287,8 @@ to be scraped, you can at least get **some** data.

 Besides the :meth:`~scrapy.selector.SelectorList.getall` and
 :meth:`~scrapy.selector.SelectorList.get` methods, you can also use
-the :meth:`~scrapy.selector.SelectorList.re` method to extract using `regular
-expressions`_:
+the :meth:`~scrapy.selector.SelectorList.re` method to extract using
+:doc:`regular expressions <library/re>`:

 >>> response.css('title::text').re(r'Quotes.*')
 ['Quotes to Scrape']
@ -305,7 +305,6 @@ with a selector (see :ref:`topics-developer-tools`).
 `Selector Gadget`_ is also a nice tool to quickly find CSS selector for
 visually selected elements, which works in many browsers.

-.. _regular expressions: https://docs.python.org/3/library/re.html
 .. _Selector Gadget: https://selectorgadget.com/


--- a/docs/news.rst
+++ b/docs/news.rst
@ -3,6 +3,348 @@
 Release notes
 =============

+.. _release-2.2.0:
+
+Scrapy 2.2.0 (2020-06-24)
+-------------------------
+
+Highlights:
+
+* Python 3.5.2+ is required now
+* :ref:`dataclass objects <dataclass-items>` and
+  :ref:`attrs objects <attrs-items>` are now valid :ref:`item types
+  <item-types>`
+* New :meth:`TextResponse.json <scrapy.http.TextResponse.json>` method
+* New :signal:`bytes_received` signal that allows canceling response download
+* :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` fixes
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   Support for Python 3.5.0 and 3.5.1 has been dropped; Scrapy now refuses to
+    run with a Python version lower than 3.5.2, which introduced
+    :class:`typing.Type` (:issue:`4615`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   :meth:`TextResponse.body_as_unicode
+    <scrapy.http.TextResponse.body_as_unicode>` is now deprecated, use
+    :attr:`TextResponse.text <scrapy.http.TextResponse.text>` instead
+    (:issue:`4546`, :issue:`4555`, :issue:`4579`)
+
+*   :class:`scrapy.item.BaseItem` is now deprecated, use
+    :class:`scrapy.item.Item` instead (:issue:`4534`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   :ref:`dataclass objects <dataclass-items>` and
+    :ref:`attrs objects <attrs-items>` are now valid :ref:`item types
+    <item-types>`, and a new itemadapter_ library makes it easy to
+    write code that :ref:`supports any item type <supporting-item-types>`
+    (:issue:`2749`, :issue:`2807`, :issue:`3761`, :issue:`3881`, :issue:`4642`)
+
+*   A new :meth:`TextResponse.json <scrapy.http.TextResponse.json>` method
+    allows to deserialize JSON responses (:issue:`2444`, :issue:`4460`,
+    :issue:`4574`)
+
+*   A new :signal:`bytes_received` signal allows monitoring response download
+    progress and :ref:`stopping downloads <topics-stop-response-download>`
+    (:issue:`4205`, :issue:`4559`)
+
+*   The dictionaries in the result list of a :ref:`media pipeline
+    <topics-media-pipeline>` now include a new key, ``status``, which indicates
+    if the file was downloaded or, if the file was not downloaded, why it was
+    not downloaded; see :meth:`FilesPipeline.get_media_requests
+    <scrapy.pipelines.files.FilesPipeline.get_media_requests>` for more
+    information (:issue:`2893`, :issue:`4486`)
+
+*   When using :ref:`Google Cloud Storage <media-pipeline-gcs>` for
+    a :ref:`media pipeline <topics-media-pipeline>`, a warning is now logged if
+    the configured credentials do not grant the required permissions
+    (:issue:`4346`, :issue:`4508`)
+
+*   :ref:`Link extractors <topics-link-extractors>` are now serializable,
+    as long as you do not use :ref:`lambdas <lambda>` for parameters; for
+    example, you can now pass link extractors in :attr:`Request.cb_kwargs
+    <scrapy.http.Request.cb_kwargs>` or
+    :attr:`Request.meta <scrapy.http.Request.meta>` when :ref:`persisting
+    scheduled requests <topics-jobs>` (:issue:`4554`)
+
+*   Upgraded the :ref:`pickle protocol <pickle-protocols>` that Scrapy uses
+    from protocol 2 to protocol 4, improving serialization capabilities and
+    performance (:issue:`4135`, :issue:`4541`)
+
+*   :func:`scrapy.utils.misc.create_instance` now raises a :exc:`TypeError`
+    exception if the resulting instance is ``None`` (:issue:`4528`,
+    :issue:`4532`)
+
+.. _itemadapter: https://github.com/scrapy/itemadapter
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` no longer
+    discards cookies defined in :attr:`Request.headers
+    <scrapy.http.Request.headers>` (:issue:`1992`, :issue:`2400`)
+
+*   :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` no longer
+    re-encodes cookies defined as :class:`bytes` in the ``cookies`` parameter
+    of the ``__init__`` method of :class:`~scrapy.http.Request`
+    (:issue:`2400`, :issue:`3575`)
+
+*   When :setting:`FEEDS` defines multiple URIs, :setting:`FEED_STORE_EMPTY` is
+    ``False`` and the crawl yields no items, Scrapy no longer stops feed
+    exports after the first URI (:issue:`4621`, :issue:`4626`)
+
+*   :class:`~scrapy.spiders.Spider` callbacks defined using :doc:`coroutine
+    syntax <topics/coroutines>` no longer need to return an iterable, and may
+    instead return a :class:`~scrapy.http.Request` object, an
+    :ref:`item <topics-items>`, or ``None`` (:issue:`4609`)
+
+*   The :command:`startproject` command now ensures that the generated project
+    folders and files have the right permissions (:issue:`4604`)
+
+*   Fix a :exc:`KeyError` exception being sometimes raised from
+    :class:`scrapy.utils.datatypes.LocalWeakReferencedCache` (:issue:`4597`,
+    :issue:`4599`)
+
+*   When :setting:`FEEDS` defines multiple URIs, log messages about items being
+    stored now contain information from the corresponding feed, instead of
+    always containing information about only one of the feeds (:issue:`4619`,
+    :issue:`4629`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Added a new section about :ref:`accessing cb_kwargs from errbacks
+    <errback-cb_kwargs>` (:issue:`4598`, :issue:`4634`)
+
+*   Covered chompjs_ in :ref:`topics-parsing-javascript` (:issue:`4556`,
+    :issue:`4562`)
+
+*   Removed from :doc:`topics/coroutines` the warning about the API being
+    experimental (:issue:`4511`, :issue:`4513`)
+
+*   Removed references to unsupported versions of :doc:`Twisted
+    <twisted:index>` (:issue:`4533`)
+
+*   Updated the description of the :ref:`screenshot pipeline example
+    <ScreenshotPipeline>`, which now uses :doc:`coroutine syntax
+    <topics/coroutines>` instead of returning a
+    :class:`~twisted.internet.defer.Deferred` (:issue:`4514`, :issue:`4593`)
+
+*   Removed a misleading import line from the
+    :func:`scrapy.utils.log.configure_logging` code example (:issue:`4510`,
+    :issue:`4587`)
+
+*   The display-on-hover behavior of internal documentation references now also
+    covers links to :ref:`commands <topics-commands>`, :attr:`Request.meta
+    <scrapy.http.Request.meta>` keys, :ref:`settings <topics-settings>` and
+    :ref:`signals <topics-signals>` (:issue:`4495`, :issue:`4563`)
+
+*   It is again possible to download the documentation for offline reading
+    (:issue:`4578`, :issue:`4585`)
+
+*   Removed backslashes preceding ``*args`` and ``**kwargs`` in some function
+    and method signatures (:issue:`4592`, :issue:`4596`)
+
+.. _chompjs: https://github.com/Nykakin/chompjs
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Adjusted the code base further to our :ref:`style guidelines
+    <coding-style>` (:issue:`4237`, :issue:`4525`, :issue:`4538`,
+    :issue:`4539`, :issue:`4540`, :issue:`4542`, :issue:`4543`, :issue:`4544`,
+    :issue:`4545`, :issue:`4557`, :issue:`4558`, :issue:`4566`, :issue:`4568`,
+    :issue:`4572`)
+
+*   Removed remnants of Python 2 support (:issue:`4550`, :issue:`4553`,
+    :issue:`4568`)
+
+*   Improved code sharing between the :command:`crawl` and :command:`runspider`
+    commands (:issue:`4548`, :issue:`4552`)
+
+*   Replaced ``chain(*iterable)`` with ``chain.from_iterable(iterable)``
+    (:issue:`4635`)
+
+*   You may now run the :mod:`asyncio` tests with Tox on any Python version
+    (:issue:`4521`)
+
+*   Updated test requirements to reflect an incompatibility with pytest 5.4 and
+    5.4.1 (:issue:`4588`)
+
+*   Improved :class:`~scrapy.spiderloader.SpiderLoader` test coverage for
+    scenarios involving duplicate spider names (:issue:`4549`, :issue:`4560`)
+
+*   Configured Travis CI to also run the tests with Python 3.5.2
+    (:issue:`4518`, :issue:`4615`)
+
+*   Added a `Pylint <https://www.pylint.org/>`_ job to Travis CI
+    (:issue:`3727`)
+
+*   Added a `Mypy <http://mypy-lang.org/>`_ job to Travis CI (:issue:`4637`)
+
+*   Made use of set literals in tests (:issue:`4573`)
+
+*   Cleaned up the Travis CI configuration (:issue:`4517`, :issue:`4519`,
+    :issue:`4522`, :issue:`4537`)
+
+
+.. _release-2.1.0:
+
+Scrapy 2.1.0 (2020-04-24)
+-------------------------
+
+Highlights:
+
+* New :setting:`FEEDS` setting to export to multiple feeds
+* New :attr:`Response.ip_address <scrapy.http.Response.ip_address>` attribute
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   :exc:`AssertionError` exceptions triggered by :ref:`assert <assert>`
+    statements have been replaced by new exception types, to support running
+    Python in optimized mode (see :option:`-O`) without changing Scrapy’s
+    behavior in any unexpected ways.
+
+    If you catch an :exc:`AssertionError` exception from Scrapy, update your
+    code to catch the corresponding new exception.
+
+    (:issue:`4440`)
+
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   The ``LOG_UNSERIALIZABLE_REQUESTS`` setting is no longer supported, use
+    :setting:`SCHEDULER_DEBUG` instead (:issue:`4385`)
+
+*   The ``REDIRECT_MAX_METAREFRESH_DELAY`` setting is no longer supported, use
+    :setting:`METAREFRESH_MAXDELAY` instead (:issue:`4385`)
+
+*   The :class:`~scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware`
+    middleware has been removed, including the entire
+    :class:`scrapy.downloadermiddlewares.chunked` module; chunked transfers
+    work out of the box (:issue:`4431`)
+
+*   The ``spiders`` property has been removed from
+    :class:`~scrapy.crawler.Crawler`, use :class:`CrawlerRunner.spider_loader
+    <scrapy.crawler.CrawlerRunner.spider_loader>` or instantiate
+    :setting:`SPIDER_LOADER_CLASS` with your settings instead (:issue:`4398`)
+
+*   The ``MultiValueDict``, ``MultiValueDictKeyError``, and ``SiteNode``
+    classes have been removed from :mod:`scrapy.utils.datatypes`
+    (:issue:`4400`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   The ``FEED_FORMAT`` and ``FEED_URI`` settings have been deprecated in
+    favor of the new :setting:`FEEDS` setting (:issue:`1336`, :issue:`3858`,
+    :issue:`4507`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   A new setting, :setting:`FEEDS`, allows configuring multiple output feeds
+    with different settings each (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The :command:`crawl` and :command:`runspider` commands now support multiple
+    ``-o`` parameters (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The :command:`crawl` and :command:`runspider` commands now support
+    specifying an output format by appending ``:<format>`` to the output file
+    (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The new :attr:`Response.ip_address <scrapy.http.Response.ip_address>`
+    attribute gives access to the IP address that originated a response
+    (:issue:`3903`, :issue:`3940`)
+
+*   A warning is now issued when a value in
+    :attr:`~scrapy.spiders.Spider.allowed_domains` includes a port
+    (:issue:`50`, :issue:`3198`, :issue:`4413`)
+
+*   Zsh completion now excludes used option aliases from the completion list
+    (:issue:`4438`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :ref:`Request serialization <request-serialization>` no longer breaks for
+    callbacks that are spider attributes which are assigned a function with a
+    different name (:issue:`4500`)
+
+*   ``None`` values in :attr:`~scrapy.spiders.Spider.allowed_domains` no longer
+    cause a :exc:`TypeError` exception (:issue:`4410`)
+
+*   Zsh completion no longer allows options after arguments (:issue:`4438`)
+
+*   zope.interface 5.0.0 and later versions are now supported
+    (:issue:`4447`, :issue:`4448`)
+
+*   :meth:`Spider.make_requests_from_url
+    <scrapy.spiders.Spider.make_requests_from_url>`, deprecated in Scrapy
+    1.4.0, now issues a warning when used (:issue:`4412`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Improved the documentation about signals that allow their handlers to
+    return a :class:`~twisted.internet.defer.Deferred` (:issue:`4295`,
+    :issue:`4390`)
+
+*   Our PyPI entry now includes links for our documentation, our source code
+    repository and our issue tracker (:issue:`4456`)
+
+*   Covered the `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_
+    service in the documentation (:issue:`4206`, :issue:`4455`)
+
+*   Removed references to the Guppy library, which only works in Python 2
+    (:issue:`4285`, :issue:`4343`)
+
+*   Extended use of InterSphinx to link to Python 3 documentation
+    (:issue:`4444`, :issue:`4445`)
+
+*   Added support for Sphinx 3.0 and later (:issue:`4475`, :issue:`4480`,
+    :issue:`4496`, :issue:`4503`)
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Removed warnings about using old, removed settings (:issue:`4404`)
+
+*   Removed a warning about importing
+    :class:`~twisted.internet.testing.StringTransport` from
+    ``twisted.test.proto_helpers`` in Twisted 19.7.0 or newer (:issue:`4409`)
+
+*   Removed outdated Debian package build files (:issue:`4384`)
+
+*   Removed :class:`object` usage as a base class (:issue:`4430`)
+
+*   Removed code that added support for old versions of Twisted that we no
+    longer support (:issue:`4472`)
+
+*   Fixed code style issues (:issue:`4468`, :issue:`4469`, :issue:`4471`,
+    :issue:`4481`)
+
+*   Removed :func:`twisted.internet.defer.returnValue` calls (:issue:`4443`,
+    :issue:`4446`, :issue:`4489`)
+
+
 .. _release-2.0.1:

 Scrapy 2.0.1 (2020-03-18)
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -1,4 +1,4 @@
-Sphinx>=2.1
-sphinx-hoverxref
-sphinx-notfound-page
-sphinx_rtd_theme
+Sphinx>=3.0
+sphinx-hoverxref>=0.2b1
+sphinx-notfound-page>=0.4
+sphinx_rtd_theme>=0.4
--- a/docs/topics/api.rst
+++ b/docs/topics/api.rst
@ -91,7 +91,7 @@ how you :ref:`configure the downloader middlewares
        provided while constructing the crawler, and it is created after the
        arguments given in the :meth:`crawl` method.

-    .. method:: crawl(\*args, \**kwargs)
+    .. method:: crawl(*args, **kwargs)

        Starts the crawler by instantiating its spider class with the given
        ``args`` and ``kwargs`` arguments, while setting the execution engine in
--- a/docs/topics/architecture.rst
+++ b/docs/topics/architecture.rst
@ -104,7 +104,7 @@ Spiders
 -------

 Spiders are custom classes written by Scrapy users to parse responses and
-extract items (aka scraped items) from them or additional requests to
+extract :ref:`items <topics-items>` from them or additional requests to
 follow. For more information see :ref:`topics-spiders`.

 .. _component-pipelines:
--- a/docs/topics/contracts.rst
+++ b/docs/topics/contracts.rst
@ -78,7 +78,7 @@ override three methods:

 .. module:: scrapy.contracts

-.. class:: Contract(method, \*args)
+.. class:: Contract(method, *args)

    :param method: callback function to which the contract is associated
    :type method: function
@ -136,7 +136,7 @@ Detecting check runs
 ====================

 When ``scrapy check`` is running, the ``SCRAPY_CHECK`` environment variable is
-set to the ``true`` string. You can use `os.environ`_ to perform any change to
+set to the ``true`` string. You can use :data:`os.environ` to perform any change to
 your spiders or your settings when ``scrapy check`` is used::

    import os
@ -148,5 +148,3 @@ your spiders or your settings when ``scrapy check`` is used::
        def __init__(self):
            if os.environ.get('SCRAPY_CHECK'):
                pass  # Do some scraper adjustments when a check is running
-
-.. _os.environ: https://docs.python.org/3/library/os.html#os.environ
--- a/docs/topics/coroutines.rst
+++ b/docs/topics/coroutines.rst
@ -7,10 +7,6 @@ Coroutines
 Scrapy has :ref:`partial support <coroutine-support>` for the
 :ref:`coroutine syntax <async>`.

-.. warning:: :mod:`asyncio` support in Scrapy is experimental. Future Scrapy
-             versions may introduce related API and behavior changes without a
-             deprecation period or warning.
-
 .. _coroutine-support:

 Supported callables
@ -57,27 +53,34 @@ There are several use cases for coroutines in Scrapy. Code that would
 return Deferreds when written for previous Scrapy versions, such as downloader
 middlewares and signal handlers, can be rewritten to be shorter and cleaner::

+    from itemadapter import ItemAdapter
+
    class DbPipeline:
        def _update_item(self, data, item):
-            item['field'] = data
+            adapter = ItemAdapter(item)
+            adapter['field'] = data
            return item

        def process_item(self, item, spider):
-            dfd = db.get_some_data(item['id'])
+            adapter = ItemAdapter(item)
+            dfd = db.get_some_data(adapter['id'])
            dfd.addCallback(self._update_item, item)
            return dfd

 becomes::

+    from itemadapter import ItemAdapter
+
    class DbPipeline:
        async def process_item(self, item, spider):
-            item['field'] = await db.get_some_data(item['id'])
+            adapter = ItemAdapter(item)
+            adapter['field'] = await db.get_some_data(adapter['id'])
            return item

 Coroutines may be used to call asynchronous code. This includes other
 coroutines, functions that return Deferreds and functions that return
-`awaitable objects`_ such as :class:`~asyncio.Future`. This means you can use
-many useful Python libraries providing such code::
+:term:`awaitable objects <awaitable>` such as :class:`~asyncio.Future`.
+This means you can use many useful Python libraries providing such code::

    class MySpider(Spider):
        # ...
@ -107,4 +110,3 @@ Common use cases for asynchronous code include:
  :ref:`the screenshot pipeline example<ScreenshotPipeline>`).

 .. _aio-libs: https://github.com/aio-libs
-.. _awaitable objects: https://docs.python.org/3/glossary.html#term-awaitable
--- a/docs/topics/developer-tools.rst
+++ b/docs/topics/developer-tools.rst
@ -292,6 +292,9 @@ Alternatively, if you want to know the arguments needed to recreate that
 request you can use the :func:`scrapy.utils.curl.curl_to_request_kwargs`
 function to get a dictionary with the equivalent arguments.

+Note that to translate a cURL command into a Scrapy request,
+you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
 As you can see, with a few inspections in the `Network`-tool we
 were able to easily replicate the dynamic requests of the scrolling 
 functionality of the page. Crawling dynamic pages can be quite
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@ -202,6 +202,11 @@ CookiesMiddleware
   sends them back on subsequent requests (from that spider), just like web
   browsers do.

+   .. caution:: When non-UTF8 encoded byte sequences are passed to a
+      :class:`~scrapy.http.Request`, the ``CookiesMiddleware`` will log
+      a warning. Refer to :ref:`topics-logging-advanced-customization`
+      to customize the logging behaviour.
+
 The following settings can be used to configure the cookie middleware:

 * :setting:`COOKIES_ENABLED`
@ -739,7 +744,7 @@ HttpProxyMiddleware
   This middleware sets the HTTP proxy to use for requests, by setting the
   ``proxy`` meta value for :class:`~scrapy.http.Request` objects.

-   Like the Python standard library modules `urllib`_ and `urllib2`_, it obeys
+   Like the Python standard library module :mod:`urllib.request`, it obeys
   the following environment variables:

   * ``http_proxy``
@ -751,9 +756,6 @@ HttpProxyMiddleware
   Keep in mind this value will take precedence over ``http_proxy``/``https_proxy``
   environment variables, and it will also ignore ``no_proxy`` environment variable.

-.. _urllib: https://docs.python.org/2/library/urllib.html
-.. _urllib2: https://docs.python.org/2/library/urllib2.html
-
 RedirectMiddleware
 ------------------

@ -829,6 +831,7 @@ REDIRECT_MAX_TIMES
 Default: ``20``

 The maximum number of redirections that will be followed for a single request.
+After this maximum, the request's response is returned as is.

 MetaRefreshMiddleware
 ---------------------
@ -1036,8 +1039,7 @@ Scrapy uses this parser by default.
 RobotFileParser
 ~~~~~~~~~~~~~~~

-Based on `RobotFileParser
-<https://docs.python.org/3.7/library/urllib.robotparser.html>`_:
+Based on :class:`~urllib.robotparser.RobotFileParser`:

 * is Python's built-in robots.txt_ parser

--- a/docs/topics/dynamic-content.rst
+++ b/docs/topics/dynamic-content.rst
@ -104,6 +104,9 @@ If you get the expected response `sometimes`, but not always, the issue is
 probably not your request, but the target server. The target server might be
 buggy, overloaded, or :ref:`banning <bans>` some of your requests.

+Note that to translate a cURL command into a Scrapy request,
+you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
 .. _topics-handling-response-formats:

 Handling different response formats
@ -115,7 +118,7 @@ data from it depends on the type of response:
 -   If the response is HTML or XML, use :ref:`selectors
    <topics-selectors>` as usual.

-   If the response is JSON, use `json.loads`_ to load the desired data from
+-   If the response is JSON, use :func:`json.loads` to load the desired data from
    :attr:`response.text <scrapy.http.TextResponse.text>`::

        data = json.loads(response.text)
@ -130,8 +133,9 @@ data from it depends on the type of response:
 -   If the response is JavaScript, or HTML with a ``<script/>`` element
    containing the desired data, see :ref:`topics-parsing-javascript`.

-   If the response is CSS, use a `regular expression`_ to extract the desired
-    data from :attr:`response.text <scrapy.http.TextResponse.text>`.
+-   If the response is CSS, use a :doc:`regular expression <library/re>` to
+    extract the desired data from
+    :attr:`response.text <scrapy.http.TextResponse.text>`.

 .. _topics-parsing-images:

@ -168,8 +172,9 @@ JavaScript code:
 Once you have a string with the JavaScript code, you can extract the desired
 data from it:

-   You might be able to use a `regular expression`_ to extract the desired
-    data in JSON format, which you can then parse with `json.loads`_.
+-   You might be able to use a :doc:`regular expression <library/re>` to
+    extract the desired data in JSON format, which you can then parse with
+    :func:`json.loads`.

    For example, if the JavaScript code contains a separate line like
    ``var data = {"field": "value"};`` you can extract that data as follows:
@ -179,6 +184,18 @@ data from it:
    >>> json.loads(json_data)
    {'field': 'value'}

+-   chompjs_ provides an API to parse JavaScript objects into a :class:`dict`.
+
+    For example, if the JavaScript code contains
+    ``var data = {field: "value", secondField: "second value"};``
+    you can extract that data as follows:
+
+    >>> import chompjs
+    >>> javascript = response.css('script::text').get()
+    >>> data = chompjs.parse_js_object(javascript)
+    >>> data
+    {'field': 'value', 'secondField': 'second value'}
+
 -   Otherwise, use js2xml_ to convert the JavaScript code into an XML document
    that you can parse using :ref:`selectors <topics-selectors>`.

@ -236,14 +253,13 @@ along with `scrapy-selenium`_ for seamless integration.


 .. _AJAX: https://en.wikipedia.org/wiki/Ajax_%28programming%29
+.. _chompjs: https://github.com/Nykakin/chompjs
 .. _CSS: https://en.wikipedia.org/wiki/Cascading_Style_Sheets
 .. _curl: https://curl.haxx.se/
 .. _headless browser: https://en.wikipedia.org/wiki/Headless_browser
 .. _JavaScript: https://en.wikipedia.org/wiki/JavaScript
 .. _js2xml: https://github.com/scrapinghub/js2xml
-.. _json.loads: https://docs.python.org/3/library/json.html#json.loads
 .. _pytesseract: https://github.com/madmaze/pytesseract
-.. _regular expression: https://docs.python.org/3/library/re.html
 .. _scrapy-selenium: https://github.com/clemfromspace/scrapy-selenium
 .. _scrapy-splash: https://github.com/scrapy-plugins/scrapy-splash
 .. _Selenium: https://www.selenium.dev/
--- a/docs/topics/email.rst
+++ b/docs/topics/email.rst
@ -7,7 +7,7 @@ Sending e-mail
 .. module:: scrapy.mail
   :synopsis: Email sending facility

-Although Python makes sending e-mails relatively easy via the `smtplib`_
+Although Python makes sending e-mails relatively easy via the :mod:`smtplib`
 library, Scrapy provides its own facility for sending e-mails which is very
 easy to use and it's implemented using :doc:`Twisted non-blocking IO
 <twisted:core/howto/defer-intro>`, to avoid interfering with the non-blocking
@ -15,8 +15,6 @@ IO of the crawler. It also provides a simple API for sending attachments and
 it's very easy to configure, with a few :ref:`settings
 <topics-email-settings>`.

-.. _smtplib: https://docs.python.org/2/library/smtplib.html
-
 Quick example
 =============

--- a/docs/topics/exceptions.rst
+++ b/docs/topics/exceptions.rst
@ -14,13 +14,6 @@ Built-in Exceptions reference

 Here's a list of all exceptions included in Scrapy and their usage.

-DropItem
--------
-
-.. exception:: DropItem
-
-The exception that must be raised by item pipeline stages to stop processing an
-Item. For more information see :ref:`topics-item-pipeline`.

 CloseSpider
 -----------
@ -47,6 +40,14 @@ DontCloseSpider
 This exception can be raised in a :signal:`spider_idle` signal handler to
 prevent the spider from being closed.

+DropItem
+--------
+
+.. exception:: DropItem
+
+The exception that must be raised by item pipeline stages to stop processing an
+Item. For more information see :ref:`topics-item-pipeline`.
+
 IgnoreRequest
 -------------

@ -77,3 +78,37 @@ NotSupported

 This exception is raised to indicate an unsupported feature.

+StopDownload
+-------------
+
+.. versionadded:: 2.2
+
+.. exception:: StopDownload(fail=True)
+
+Raised from a :class:`~scrapy.signals.bytes_received` signal handler to
+indicate that no further bytes should be downloaded for a response.
+
+The ``fail`` boolean parameter controls which method will handle the resulting
+response:
+
+* If ``fail=True`` (default), the request errback is called. The response object is
+  available as the ``response`` attribute of the ``StopDownload`` exception,
+  which is in turn stored as the ``value`` attribute of the received
+  :class:`~twisted.python.failure.Failure` object. This means that in an errback
+  defined as ``def errback(self, failure)``, the response can be accessed though
+  ``failure.value.response``.
+
+* If ``fail=False``, the request callback is called instead.
+
+In both cases, the response could have its body truncated: the body contains
+all bytes received up until the exception is raised, including the bytes
+received in the signal handler that raises the exception. Also, the response
+object is marked with ``"download_stopped"`` in its :attr:`Response.flags`
+attribute.
+
+.. note:: ``fail`` is a keyword-only parameter, i.e. raising
+    ``StopDownload(False)`` or ``StopDownload(True)`` will raise
+    a :class:`TypeError`.
+
+See the documentation for the :class:`~scrapy.signals.bytes_received` signal
+and the :ref:`topics-stop-response-download` topic for additional information and examples.
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@ -40,6 +40,7 @@ Here you can see an :doc:`Item Pipeline <item-pipeline>` which uses multiple
 Item Exporters to group scraped items to different files according to the
 value of one of their fields::

+    from itemadapter import ItemAdapter
    from scrapy.exporters import XmlItemExporter

    class PerYearXmlExportPipeline:
@ -53,7 +54,8 @@ value of one of their fields::
                exporter.finish_exporting()

        def _exporter_for_item(self, item):
-            year = item['year']
+            adapter = ItemAdapter(item)
+            year = adapter['year']
            if year not in self.year_to_exporter:
                f = open('{}.xml'.format(year), 'wb')
                exporter = XmlItemExporter(f)
@ -167,9 +169,10 @@ BaseItemExporter
      value unchanged except for ``unicode`` values which are encoded to
      ``str`` using the encoding declared in the :attr:`encoding` attribute.

-      :param field: the field being serialized. If a raw dict is being
-          exported (not :class:`~.Item`) *field* value is an empty dict.
-      :type field: :class:`~scrapy.item.Field` object or an empty dict
+      :param field: the field being serialized. If the source :ref:`item object
+          <item-types>` does not define field metadata, *field* is an empty
+          :class:`dict`.
+      :type field: :class:`~scrapy.item.Field` object or a :class:`dict` instance

      :param name: the name of the field being serialized
      :type name: str
@ -192,14 +195,17 @@ BaseItemExporter

   .. attribute:: fields_to_export

-      A list with the name of the fields that will be exported, or None if you
-      want to export all fields. Defaults to None.
+      A list with the name of the fields that will be exported, or ``None`` if
+      you want to export all fields. Defaults to ``None``.

      Some exporters (like :class:`CsvItemExporter`) respect the order of the
      fields defined in this attribute.

-      Some exporters may require fields_to_export list in order to export the
-      data properly when spiders return dicts (not :class:`~Item` instances).
+      When using :ref:`item objects <item-types>` that do not expose all their
+      possible fields, exporters that do not support exporting a different
+      subset of fields per item will only export the fields found in the first
+      item exported. Use ``fields_to_export`` to define all the fields to be
+      exported.

   .. attribute:: export_empty_fields

@ -236,9 +242,9 @@ PythonItemExporter
 XmlItemExporter
 ---------------

-.. class:: XmlItemExporter(file, item_element='item', root_element='items', \**kwargs)
+.. class:: XmlItemExporter(file, item_element='item', root_element='items', **kwargs)

-   Exports Items in XML format to the specified file object.
+   Exports items in XML format to the specified file object.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@ -290,9 +296,9 @@ XmlItemExporter
 CsvItemExporter
 ---------------

-.. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', \**kwargs)
+.. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', **kwargs)

-   Exports Items in CSV format to the given file-like object. If the
+   Exports items in CSV format to the given file-like object. If the
   :attr:`fields_to_export` attribute is set, it will be used to define the
   CSV columns and their order. The :attr:`export_empty_fields` attribute has
   no effect on this exporter.
@ -311,7 +317,7 @@ CsvItemExporter

   The additional keyword arguments of this ``__init__`` method are passed to the
   :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to the
-   `csv.writer`_ ``__init__`` method, so you can use any ``csv.writer`` ``__init__`` method
+   :func:`csv.writer` function, so you can use any :func:`csv.writer` function
   argument to customize this exporter.

   A typical output of this exporter would be::
@ -320,14 +326,12 @@ CsvItemExporter
      Color TV,1200
      DVD player,200

-.. _csv.writer: https://docs.python.org/2/library/csv.html#csv.writer
-
 PickleItemExporter
 ------------------

-.. class:: PickleItemExporter(file, protocol=0, \**kwargs)
+.. class:: PickleItemExporter(file, protocol=0, **kwargs)

-   Exports Items in pickle format to the given file-like object.
+   Exports items in pickle format to the given file-like object.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@ -335,21 +339,19 @@ PickleItemExporter
   :param protocol: The pickle protocol to use.
   :type protocol: int

-   For more information, refer to the `pickle module documentation`_.
+   For more information, see :mod:`pickle`.

   The additional keyword arguments of this ``__init__`` method are passed to the
   :class:`BaseItemExporter` ``__init__`` method.

   Pickle isn't a human readable format, so no output examples are provided.

-.. _pickle module documentation: https://docs.python.org/2/library/pickle.html
-
 PprintItemExporter
 ------------------

-.. class:: PprintItemExporter(file, \**kwargs)
+.. class:: PprintItemExporter(file, **kwargs)

-   Exports Items in pretty print format to the specified file object.
+   Exports items in pretty print format to the specified file object.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@ -367,13 +369,13 @@ PprintItemExporter
 JsonItemExporter
 ----------------

-.. class:: JsonItemExporter(file, \**kwargs)
+.. class:: JsonItemExporter(file, **kwargs)

-   Exports Items in JSON format to the specified file-like object, writing all
+   Exports items in JSON format to the specified file-like object, writing all
   objects as a list of objects. The additional ``__init__`` method arguments are
   passed to the :class:`BaseItemExporter` ``__init__`` method, and the leftover
-   arguments to the `JSONEncoder`_ ``__init__`` method, so you can use any
-   `JSONEncoder`_ ``__init__`` method argument to customize this exporter.
+   arguments to the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
+   :class:`~json.JSONEncoder` ``__init__`` method argument to customize this exporter.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@ -393,18 +395,16 @@ JsonItemExporter
      stream-friendly format, consider using :class:`JsonLinesItemExporter`
      instead, or splitting the output in multiple chunks.

-.. _JSONEncoder: https://docs.python.org/2/library/json.html#json.JSONEncoder
-
 JsonLinesItemExporter
 ---------------------

-.. class:: JsonLinesItemExporter(file, \**kwargs)
+.. class:: JsonLinesItemExporter(file, **kwargs)

-   Exports Items in JSON format to the specified file-like object, writing one
+   Exports items in JSON format to the specified file-like object, writing one
   JSON-encoded item per line. The additional ``__init__`` method arguments are passed
   to the :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to
-   the `JSONEncoder`_ ``__init__`` method, so you can use any `JSONEncoder`_
-   ``__init__`` method argument to customize this exporter.
+   the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
+   :class:`~json.JSONEncoder` ``__init__`` method argument to customize this exporter.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@ -417,8 +417,6 @@ JsonLinesItemExporter
   Unlike the one produced by :class:`JsonItemExporter`, the format produced by
   this exporter is well suited for serializing large amounts of data.

-.. _JSONEncoder: https://docs.python.org/2/library/json.html#json.JSONEncoder
-
 MarshalItemExporter
 -------------------

--- a/docs/topics/extensions.rst
+++ b/docs/topics/extensions.rst
@ -364,7 +364,7 @@ Debugger extension

 .. class:: Debugger

-Invokes a `Python debugger`_ inside a running Scrapy process when a `SIGUSR2`_
+Invokes a :doc:`Python debugger <library/pdb>` inside a running Scrapy process when a `SIGUSR2`_
 signal is received. After the debugger is exited, the Scrapy process continues
 running normally.

@ -372,5 +372,4 @@ For more info see `Debugging in Python`_.

 This extension only works on POSIX-compliant platforms (i.e. not Windows).

-.. _Python debugger: https://docs.python.org/2/library/pdb.html
 .. _Debugging in Python: https://pythonconquerstheuniverse.wordpress.com/2009/09/10/debugging-in-python/
--- a/docs/topics/feed-exports.rst
+++ b/docs/topics/feed-exports.rst
@ -298,8 +298,8 @@ Example: ``FEED_EXPORT_FIELDS = ["foo", "bar", "baz"]``.

 Use FEED_EXPORT_FIELDS option to define fields to export and their order.

-When FEED_EXPORT_FIELDS is empty or None (default), Scrapy uses fields
-defined in dicts or :class:`~.Item` subclasses a spider is yielding.
+When FEED_EXPORT_FIELDS is empty or None (default), Scrapy uses the fields
+defined in :ref:`item objects <topics-items>` yielded by your spider.

 If an exporter requires a fixed set of fields (this is the case for
 :ref:`CSV <topics-feed-format-csv>` export format) and FEED_EXPORT_FIELDS
--- a/docs/topics/item-pipeline.rst
+++ b/docs/topics/item-pipeline.rst
@ -27,15 +27,19 @@ Each item pipeline component is a Python class that must implement the following

 .. method:: process_item(self, item, spider)

-   This method is called for every item pipeline component. :meth:`process_item`
-   must either: return a dict with data, return an :class:`~scrapy.item.Item`
-   (or any descendant class) object, return a
-   :class:`~twisted.internet.defer.Deferred` or raise
-   :exc:`~scrapy.exceptions.DropItem` exception. Dropped items are no longer
-   processed by further pipeline components.
+   This method is called for every item pipeline component.

-   :param item: the item scraped
-   :type item: :class:`~scrapy.item.Item` object or a dict
+   `item` is an :ref:`item object <item-types>`, see
+   :ref:`supporting-item-types`.
+
+   :meth:`process_item` must either: return an :ref:`item object <item-types>`,
+   return a :class:`~twisted.internet.defer.Deferred` or raise a
+   :exc:`~scrapy.exceptions.DropItem` exception.
+
+   Dropped items are no longer processed by further pipeline components.
+
+   :param item: the scraped item
+   :type item: :ref:`item object <item-types>`

   :param spider: the spider which scraped the item
   :type spider: :class:`~scrapy.spiders.Spider` object
@ -79,16 +83,17 @@ Let's take a look at the following hypothetical pipeline that adjusts the
 (``price_excludes_vat`` attribute), and drops those items which don't
 contain a price::

+    from itemadapter import ItemAdapter
    from scrapy.exceptions import DropItem
-
    class PricePipeline:

        vat_factor = 1.15

        def process_item(self, item, spider):
-            if item.get('price'):
-                if item.get('price_excludes_vat'):
-                    item['price'] = item['price'] * self.vat_factor
+            adapter = ItemAdapter(item)
+            if adapter.get('price'):
+                if adapter.get('price_excludes_vat'):
+                    adapter['price'] = adapter['price'] * self.vat_factor
                return item
            else:
                raise DropItem("Missing price in %s" % item)
@ -103,6 +108,8 @@ format::

   import json

+   from itemadapter import ItemAdapter
+
   class JsonWriterPipeline:

       def open_spider(self, spider):
@ -112,7 +119,7 @@ format::
           self.file.close()

       def process_item(self, item, spider):
-           line = json.dumps(dict(item)) + "\n"
+           line = json.dumps(ItemAdapter(item).asdict()) + "\n"
           self.file.write(line)
           return item

@ -131,6 +138,7 @@ The main point of this example is to show how to use :meth:`from_crawler`
 method and how to clean up the resources properly.::

    import pymongo
+    from itemadapter import ItemAdapter

    class MongoPipeline:

@ -155,7 +163,7 @@ method and how to clean up the resources properly.::
            self.client.close()

        def process_item(self, item, spider):
-            self.db[self.collection_name].insert_one(dict(item))
+            self.db[self.collection_name].insert_one(ItemAdapter(item).asdict())
            return item

 .. _MongoDB: https://www.mongodb.com/
@ -167,18 +175,21 @@ method and how to clean up the resources properly.::
 Take screenshot of item
 -----------------------

-This example demonstrates how to return a
-:class:`~twisted.internet.defer.Deferred` from the :meth:`process_item` method.
-It uses Splash_ to render screenshot of item url. Pipeline
-makes request to locally running instance of Splash_. After request is downloaded,
-it saves the screenshot to a file and adds filename to the item.
+This example demonstrates how to use :doc:`coroutine syntax <coroutines>` in
+the :meth:`process_item` method.
+
+This item pipeline makes a request to a locally-running instance of Splash_ to
+render a screenshot of the item URL. After the request response is downloaded,
+the item pipeline saves the screenshot to a file and adds the filename to the
+item.

 ::

-    import scrapy
    import hashlib
    from urllib.parse import quote

+    import scrapy
+    from itemadapter import ItemAdapter

    class ScreenshotPipeline:
        """Pipeline that uses Splash to render screenshot of
@ -187,7 +198,8 @@ it saves the screenshot to a file and adds filename to the item.
        SPLASH_URL = "http://localhost:8050/render.png?url={}"

        async def process_item(self, item, spider):
-            encoded_item_url = quote(item["url"])
+            adapter = ItemAdapter(item)
+            encoded_item_url = quote(adapter["url"])
            screenshot_url = self.SPLASH_URL.format(encoded_item_url)
            request = scrapy.Request(screenshot_url)
            response = await spider.crawler.engine.download(request, spider)
@ -197,14 +209,14 @@ it saves the screenshot to a file and adds filename to the item.
                return item

            # Save screenshot to file, filename will be hash of url.
-            url = item["url"]
+            url = adapter["url"]
            url_hash = hashlib.md5(url.encode("utf8")).hexdigest()
            filename = "{}.png".format(url_hash)
            with open(filename, "wb") as f:
                f.write(response.body)

            # Store filename in item.
-            item["screenshot_filename"] = filename
+            adapter["screenshot_filename"] = filename
            return item

 .. _Splash: https://splash.readthedocs.io/en/stable/
@ -217,6 +229,7 @@ already processed. Let's say that our items have a unique id, but our spider
 returns multiples items with the same id::


+    from itemadapter import ItemAdapter
    from scrapy.exceptions import DropItem

    class DuplicatesPipeline:
@ -225,10 +238,11 @@ returns multiples items with the same id::
            self.ids_seen = set()

        def process_item(self, item, spider):
-            if item['id'] in self.ids_seen:
-                raise DropItem("Duplicate item found: %s" % item)
+            adapter = ItemAdapter(item)
+            if adapter['id'] in self.ids_seen:
+                raise DropItem("Duplicate item found: %r" % item)
            else:
-                self.ids_seen.add(item['id'])
+                self.ids_seen.add(adapter['id'])
                return item


--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@ -8,31 +8,155 @@ Items
   :synopsis: Item and Field classes

 The main goal in scraping is to extract structured data from unstructured
-sources, typically, web pages. Scrapy spiders can return the extracted data
-as Python dicts. While convenient and familiar, Python dicts lack structure:
-it is easy to make a typo in a field name or return inconsistent data,
-especially in a larger project with many spiders.
+sources, typically, web pages. :ref:`Spiders <topics-spiders>` may return the
+extracted data as `items`, Python objects that define key-value pairs.

-To define common output data format Scrapy provides the :class:`Item` class.
-:class:`Item` objects are simple containers used to collect the scraped data.
-They provide a `dictionary-like`_ API with a convenient syntax for declaring
-their available fields.
+Scrapy supports :ref:`multiple types of items <item-types>`. When you create an
+item, you may use whichever type of item you want. When you write code that
+receives an item, your code should :ref:`work for any item type
+<supporting-item-types>`.

-Various Scrapy components use extra information provided by Items:
-exporters look at declared fields to figure out columns to export,
-serialization can be customized using Item fields metadata, :mod:`trackref`
-tracks Item instances to help find memory leaks
-(see :ref:`topics-leaks-trackrefs`), etc.
+.. _item-types:

-.. _dictionary-like: https://docs.python.org/2/library/stdtypes.html#dict
+Item Types
+==========
+
+Scrapy supports the following types of items, via the `itemadapter`_ library:
+:ref:`dictionaries <dict-items>`, :ref:`Item objects <item-objects>`,
+:ref:`dataclass objects <dataclass-items>`, and :ref:`attrs objects <attrs-items>`.
+
+.. _itemadapter: https://github.com/scrapy/itemadapter
+
+.. _dict-items:
+
+Dictionaries
+------------
+
+As an item type, :class:`dict` is convenient and familiar.
+
+.. _item-objects:
+
+Item objects
+------------
+
+:class:`Item` provides a :class:`dict`-like API plus additional features that
+make it the most feature-complete item type:
+
+.. class:: Item([arg])
+
+    :class:`Item` objects replicate the standard :class:`dict` API, including
+    its ``__init__`` method.
+
+    :class:`Item` allows defining field names, so that:
+
+    -   :class:`KeyError` is raised when using undefined field names (i.e.
+        prevents typos going unnoticed)
+
+    -   :ref:`Item exporters <topics-exporters>` can export all fields by
+        default even if the first scraped object does not have values for all
+        of them
+
+    :class:`Item` also allows defining field metadata, which can be used to
+    :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+    :mod:`trackref` tracks :class:`Item` objects to help find memory leaks
+    (see :ref:`topics-leaks-trackrefs`).
+
+    :class:`Item` objects also provide the following additional API members:
+
+    .. automethod:: copy
+
+    .. automethod:: deepcopy
+
+    .. attribute:: fields
+
+        A dictionary containing *all declared fields* for this Item, not only
+        those populated. The keys are the field names and the values are the
+        :class:`Field` objects used in the :ref:`Item declaration
+        <topics-items-declaring>`.
+
+Example::
+
+    from scrapy.item import Item, Field
+
+    class CustomItem(Item):
+        one_field = Field()
+        another_field = Field()
+
+.. _dataclass-items:
+
+Dataclass objects
+-----------------
+
+.. versionadded:: 2.2
+
+:func:`~dataclasses.dataclass` allows defining item classes with field names,
+so that :ref:`item exporters <topics-exporters>` can export all fields by
+default even if the first scraped object does not have values for all of them.
+
+Additionally, ``dataclass`` items also allow to:
+
+* define the type and default value of each defined field.
+
+* define custom field metadata through :func:`dataclasses.field`, which can be used to
+  :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+They work natively in Python 3.7 or later, or using the `dataclasses
+backport`_ in Python 3.6.
+
+.. _dataclasses backport: https://pypi.org/project/dataclasses/
+
+Example::
+
+    from dataclasses import dataclass
+
+    @dataclass
+    class CustomItem:
+        one_field: str
+        another_field: int
+
+.. note:: Field types are not enforced at run time.
+
+.. _attrs-items:
+
+attr.s objects
+--------------
+
+.. versionadded:: 2.2
+
+:func:`attr.s` allows defining item classes with field names,
+so that :ref:`item exporters <topics-exporters>` can export all fields by
+default even if the first scraped object does not have values for all of them.
+
+Additionally, ``attr.s`` items also allow to:
+
+* define the type and default value of each defined field.
+
+* define custom field :ref:`metadata <attrs:metadata>`, which can be used to
+  :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+In order to use this type, the :doc:`attrs package <attrs:index>` needs to be installed.
+
+Example::
+
+    import attr
+
+    @attr.s
+    class CustomItem:
+        one_field = attr.ib()
+        another_field = attr.ib()
+
+
+Working with Item objects
+=========================

 .. _topics-items-declaring:

-Declaring Items
-===============
+Declaring Item subclasses
+-------------------------

-Items are declared using a simple class definition syntax and :class:`Field`
-objects. Here is an example::
+Item subclasses are declared using a simple class definition syntax and
+:class:`Field` objects. Here is an example::

    import scrapy

@ -50,10 +174,11 @@ objects. Here is an example::
 .. _Django: https://www.djangoproject.com/
 .. _Django Models: https://docs.djangoproject.com/en/dev/topics/db/models/

+
 .. _topics-items-fields:

-Item Fields
-===========
+Declaring fields
+----------------

 :class:`Field` objects are used to specify metadata for each field. For
 example, the serializer function for the ``last_updated`` field illustrated in
@ -74,15 +199,31 @@ It's important to note that the :class:`Field` objects used to declare the item
 do not stay assigned as class attributes. Instead, they can be accessed through
 the :attr:`Item.fields` attribute.

-Working with Items
-==================
+.. class:: Field([arg])
+
+    The :class:`Field` class is just an alias to the built-in :class:`dict` class and
+    doesn't provide any extra functionality or attributes. In other words,
+    :class:`Field` objects are plain-old Python dicts. A separate class is used
+    to support the :ref:`item declaration syntax <topics-items-declaring>`
+    based on class attributes.
+
+.. note:: Field metadata can also be declared for ``dataclass`` and ``attrs``
+    items. Please refer to the documentation for `dataclasses.field`_ and
+    `attr.ib`_ for additional information.
+
+    .. _dataclasses.field: https://docs.python.org/3/library/dataclasses.html#dataclasses.field
+    .. _attr.ib: https://www.attrs.org/en/stable/api.html#attr.ib
+
+
+Working with Item objects
+-------------------------

 Here are some examples of common tasks performed with items, using the
 ``Product`` item :ref:`declared above  <topics-items-declaring>`. You will
-notice the API is very similar to the `dict API`_.
+notice the API is very similar to the :class:`dict` API.

 Creating items
--------------
+''''''''''''''

 >>> product = Product(name='Desktop PC', price=1000)
 >>> print(product)
@ -90,7 +231,7 @@ Product(name='Desktop PC', price=1000)


 Getting field values
--------------------
+''''''''''''''''''''

 >>> product['name']
 Desktop PC
@ -130,7 +271,7 @@ False


 Setting field values
--------------------
+''''''''''''''''''''

 >>> product['last_updated'] = 'today'
 >>> product['last_updated']
@ -143,9 +284,9 @@ KeyError: 'Product does not support field: lala'


 Accessing all populated values
------------------------------
+''''''''''''''''''''''''''''''

-To access all populated values, just use the typical `dict API`_:
+To access all populated values, just use the typical :class:`dict` API:

 >>> product.keys()
 ['price', 'name']
@ -157,16 +298,14 @@ To access all populated values, just use the typical `dict API`_:
 .. _copying-items:

 Copying items
-------------
+'''''''''''''

 To copy an item, you must first decide whether you want a shallow copy or a
 deep copy.

-If your item contains mutable_ values like lists or dictionaries, a shallow
-copy will keep references to the same mutable values across all different
-copies.
-
-.. _mutable: https://docs.python.org/3/glossary.html#term-mutable
+If your item contains :term:`mutable` values like lists or dictionaries,
+a shallow copy will keep references to the same mutable values across all
+different copies.

 For example, if you have an item with a list of tags, and you create a shallow
 copy of that item, both the original item and the copy have the same list of
@ -175,9 +314,7 @@ other item as well.

 If that is not the desired behavior, use a deep copy instead.

-See the `documentation of the copy module`_ for more information.
-
-.. _documentation of the copy module: https://docs.python.org/3/library/copy.html
+See :mod:`copy` for more information.

 To create a shallow copy of an item, you can either call
 :meth:`~scrapy.item.Item.copy` on an existing item
@ -189,7 +326,7 @@ To create a deep copy, call :meth:`~scrapy.item.Item.deepcopy` instead


 Other common tasks
------------------
+''''''''''''''''''

 Creating dicts from items:

@ -207,8 +344,8 @@ Traceback (most recent call last):
 KeyError: 'Product does not support field: lala'


-Extending Items
-===============
+Extending Item subclasses
+-------------------------

 You can extend Items (to add more fields or to change some metadata for some
 fields) by declaring a subclass of your original Item.
@ -228,46 +365,25 @@ appending more values, or changing existing values, like this::
 That adds (or replaces) the ``serializer`` metadata key for the ``name`` field,
 keeping all the previously existing metadata values.

-Item objects
-============

-.. class:: Item([arg])
+.. _supporting-item-types:

-    Return a new Item optionally initialized from the given argument.
+Supporting All Item Types
+=========================

-    Items replicate the standard `dict API`_, including its ``__init__`` method, and
-    also provide the following additional API members:
+In code that receives an item, such as methods of :ref:`item pipelines
+<topics-item-pipeline>` or :ref:`spider middlewares
+<topics-spider-middleware>`, it is a good practice to use the
+:class:`~itemadapter.ItemAdapter` class and the
+:func:`~itemadapter.is_item` function to write code that works for
+any :ref:`supported item type <item-types>`:

-    .. automethod:: copy
+.. autoclass:: itemadapter.ItemAdapter

-    .. automethod:: deepcopy
-
-    .. attribute:: fields
-
-        A dictionary containing *all declared fields* for this Item, not only
-        those populated. The keys are the field names and the values are the
-        :class:`Field` objects used in the :ref:`Item declaration
-        <topics-items-declaring>`.
-
-.. _dict API: https://docs.python.org/2/library/stdtypes.html#dict
-
-Field objects
-=============
-
-.. class:: Field([arg])
-
-    The :class:`Field` class is just an alias to the built-in `dict`_ class and
-    doesn't provide any extra functionality or attributes. In other words,
-    :class:`Field` objects are plain-old Python dicts. A separate class is used
-    to support the :ref:`item declaration syntax <topics-items-declaring>`
-    based on class attributes.
-
-.. _dict: https://docs.python.org/2/library/stdtypes.html#dict
+.. autofunction:: itemadapter.is_item


-Other classes related to Item
-=============================
-
-.. autoclass:: BaseItem
+Other classes related to items
+==============================

 .. autoclass:: ItemMeta
--- a/docs/topics/leaks.rst
+++ b/docs/topics/leaks.rst
@ -4,7 +4,7 @@
 Debugging memory leaks
 ======================

-In Scrapy, objects such as Requests, Responses and Items have a finite
+In Scrapy, objects such as requests, responses and items have a finite
 lifetime: they are created, used for a while, and finally destroyed.

 From all those objects, the Request is probably the one with the longest
@ -61,8 +61,8 @@ Debugging memory leaks with ``trackref``
 ========================================

 :mod:`trackref` is a module provided by Scrapy to debug the most common cases of
-memory leaks. It basically tracks the references to all live Requests,
-Responses, Item and Selector objects.
+memory leaks. It basically tracks the references to all live Request,
+Response, Item, Spider and Selector objects.

 You can enter the telnet console and inspect how many objects (of the classes
 mentioned above) are currently alive using the ``prefs()`` function which is an
@ -200,11 +200,10 @@ Debugging memory leaks with muppy

 ``trackref`` provides a very convenient mechanism for tracking down memory
 leaks, but it only keeps track of the objects that are more likely to cause
-memory leaks (Requests, Responses, Items, and Selectors). However, there are
-other cases where the memory leaks could come from other (more or less obscure)
-objects. If this is your case, and you can't find your leaks using ``trackref``,
-you still have another resource: the muppy library.
-
+memory leaks. However, there are other cases where the memory leaks could come
+from other (more or less obscure) objects. If this is your case, and you can't
+find your leaks using ``trackref``, you still have another resource: the muppy
+library.

 You can use muppy from `Pympler`_.

--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@ -7,13 +7,12 @@ Item Loaders
 .. module:: scrapy.loader
   :synopsis: Item Loader class

-Item Loaders provide a convenient mechanism for populating scraped :ref:`Items
-<topics-items>`. Even though Items can be populated using their own
-dictionary-like API, Item Loaders provide a much more convenient API for
-populating them from a scraping process, by automating some common tasks like
-parsing the raw extracted data before assigning it.
+Item Loaders provide a convenient mechanism for populating scraped :ref:`items
+<topics-items>`. Even though items can be populated directly, Item Loaders provide a
+much more convenient API for populating them from a scraping process, by automating
+some common tasks like parsing the raw extracted data before assigning it.

-In other words, :ref:`Items <topics-items>` provide the *container* of
+In other words, :ref:`items <topics-items>` provide the *container* of
 scraped data, while Item Loaders provide the mechanism for *populating* that
 container.

@ -25,10 +24,10 @@ Using Item Loaders to populate items
 ====================================

 To use an Item Loader, you must first instantiate it. You can either
-instantiate it with a dict-like object (e.g. Item or dict) or without one, in
-which case an Item is automatically instantiated in the Item Loader ``__init__`` method
-using the Item class specified in the :attr:`ItemLoader.default_item_class`
-attribute.
+instantiate it with an :ref:`item object <topics-items>` or without one, in which
+case an :ref:`item object <topics-items>` is automatically created in the
+Item Loader ``__init__`` method using the :ref:`item <topics-items>` class
+specified in the :attr:`ItemLoader.default_item_class` attribute.

 Then, you start collecting values into the Item Loader, typically using
 :ref:`Selectors <topics-selectors>`. You can add more than one value to
@ -77,6 +76,31 @@ called which actually returns the item populated with the data
 previously extracted and collected with the :meth:`~ItemLoader.add_xpath`,
 :meth:`~ItemLoader.add_css`, and :meth:`~ItemLoader.add_value` calls.

+
+.. _topics-loaders-dataclass:
+
+Working with dataclass items
+============================
+
+By default, :ref:`dataclass items <dataclass-items>` require all fields to be
+passed when created. This could be an issue when using dataclass items with
+item loaders: unless a pre-populated item is passed to the loader, fields
+will be populated incrementally using the loader's :meth:`~ItemLoader.add_xpath`,
+:meth:`~ItemLoader.add_css` and :meth:`~ItemLoader.add_value` methods.
+
+One approach to overcome this is to define items using the
+:func:`~dataclasses.field` function, with a ``default`` argument::
+
+    from dataclasses import dataclass, field
+    from typing import Optional
+
+    @dataclass
+    class InventoryItem:
+        name: Optional[str] = field(default=None)
+        price: Optional[float] = field(default=None)
+        stock: Optional[int] = field(default=None)
+
+
 .. _topics-loaders-processors:

 Input and Output processors
@ -88,7 +112,7 @@ received (through the :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`
 :meth:`~ItemLoader.add_value` methods) and the result of the input processor is
 collected and kept inside the ItemLoader. After collecting all data, the
 :meth:`ItemLoader.load_item` method is called to populate and get the populated
-:class:`~scrapy.item.Item` object.  That's when the output processor is
+:ref:`item object <topics-items>`.  That's when the output processor is
 called with the data previously collected (and processed using the input
 processor). The result of the output processor is the final value that gets
 assigned to the item.
@ -153,12 +177,10 @@ Last, but not least, Scrapy comes with some :ref:`commonly used processors
 <topics-loaders-available-processors>` built-in for convenience.


-
 Declaring Item Loaders
 ======================

-Item Loaders are declared like Items, by using a class definition syntax. Here
-is an example::
+Item Loaders are declared using a class definition syntax. Here is an example::

    from scrapy.loader import ItemLoader
    from scrapy.loader.processors import TakeFirst, MapCompose, Join
@ -273,11 +295,11 @@ There are several ways to modify Item Loader context values:
 ItemLoader objects
 ==================

-.. class:: ItemLoader([item, selector, response], \**kwargs)
+.. class:: ItemLoader([item, selector, response], **kwargs)

-    Return a new Item Loader for populating the given Item. If no item is
-    given, one is instantiated automatically using the class in
-    :attr:`default_item_class`.
+    Return a new Item Loader for populating the given :ref:`item object
+    <topics-items>`. If no item object is given, one is instantiated
+    automatically using the class in :attr:`default_item_class`.

    When instantiated with a ``selector`` or a ``response`` parameters
    the :class:`ItemLoader` class provides convenient mechanisms for extracting
@ -286,7 +308,7 @@ ItemLoader objects
    :param item: The item instance to populate using subsequent calls to
        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
        or :meth:`~ItemLoader.add_value`.
-    :type item: :class:`~scrapy.item.Item` object
+    :type item: :ref:`item object <topics-items>`

    :param selector: The selector to extract data from, when using the
        :meth:`add_xpath` (resp. :meth:`add_css`) or :meth:`replace_xpath`
@ -303,7 +325,7 @@ ItemLoader objects

    :class:`ItemLoader` instances have the following methods:

-    .. method:: get_value(value, \*processors, \**kwargs)
+    .. method:: get_value(value, *processors, **kwargs)

        Process the given ``value`` by the given ``processors`` and keyword
        arguments.
@ -321,7 +343,7 @@ ItemLoader objects
        >>> loader.get_value(u'name: foo', TakeFirst(), unicode.upper, re='name: (.+)')
        'FOO`

-    .. method:: add_value(field_name, value, \*processors, \**kwargs)
+    .. method:: add_value(field_name, value, *processors, **kwargs)

        Process and then add the given ``value`` for the given field.

@ -343,11 +365,11 @@ ItemLoader objects
            loader.add_value('name', u'name: foo', TakeFirst(), re='name: (.+)')
            loader.add_value(None, {'name': u'foo', 'sex': u'male'})

-    .. method:: replace_value(field_name, value, \*processors, \**kwargs)
+    .. method:: replace_value(field_name, value, *processors, **kwargs)

        Similar to :meth:`add_value` but replaces the collected data with the
        new value instead of adding it.
-    .. method:: get_xpath(xpath, \*processors, \**kwargs)
+    .. method:: get_xpath(xpath, *processors, **kwargs)

        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
@ -367,7 +389,7 @@ ItemLoader objects
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')

-    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
+    .. method:: add_xpath(field_name, xpath, *processors, **kwargs)

        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
@ -385,12 +407,12 @@ ItemLoader objects
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')

-    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
+    .. method:: replace_xpath(field_name, xpath, *processors, **kwargs)

        Similar to :meth:`add_xpath` but replaces collected data instead of
        adding it.

-    .. method:: get_css(css, \*processors, \**kwargs)
+    .. method:: get_css(css, *processors, **kwargs)

        Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
        instead of a value, which is used to extract a list of unicode strings
@ -410,7 +432,7 @@ ItemLoader objects
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_css('p#price', TakeFirst(), re='the price is (.*)')

-    .. method:: add_css(field_name, css, \*processors, \**kwargs)
+    .. method:: add_css(field_name, css, *processors, **kwargs)

        Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
        instead of a value, which is used to extract a list of unicode strings
@ -428,7 +450,7 @@ ItemLoader objects
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_css('price', 'p#price', re='the price is (.*)')

-    .. method:: replace_css(field_name, css, \*processors, \**kwargs)
+    .. method:: replace_css(field_name, css, *processors, **kwargs)

        Similar to :meth:`add_css` but replaces collected data instead of
        adding it.
@ -444,17 +466,19 @@ ItemLoader objects

        Create a nested loader with an xpath selector.
        The supplied selector is applied relative to selector associated
-        with this :class:`ItemLoader`. The nested loader shares the :class:`Item`
-        with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
-        :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
+        with this :class:`ItemLoader`. The nested loader shares the :ref:`item
+        object <topics-items>` with the parent :class:`ItemLoader` so calls to
+        :meth:`add_xpath`, :meth:`add_value`, :meth:`replace_value`, etc. will
+        behave as expected.

    .. method:: nested_css(css)

        Create a nested loader with a css selector.
        The supplied selector is applied relative to selector associated
-        with this :class:`ItemLoader`. The nested loader shares the :class:`Item`
-        with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
-        :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
+        with this :class:`ItemLoader`. The nested loader shares the :ref:`item
+        object <topics-items>` with the parent :class:`ItemLoader` so calls to
+        :meth:`add_xpath`, :meth:`add_value`, :meth:`replace_value`, etc. will
+        behave as expected.

    .. method:: get_collected_values(field_name)

@ -477,7 +501,7 @@ ItemLoader objects

    .. attribute:: item

-        The :class:`~scrapy.item.Item` object being parsed by this Item Loader.
+        The :ref:`item object <topics-items>` being parsed by this Item Loader.
        This is mostly used as a property so when attempting to override this
        value, you may want to check out :attr:`default_item_class` first.

@ -488,8 +512,8 @@ ItemLoader objects

    .. attribute:: default_item_class

-        An Item class (or factory), used to instantiate items when not given in
-        the ``__init__`` method.
+        An :ref:`item object <topics-items>` class or factory, used to
+        instantiate items when not given in the ``__init__`` method.

    .. attribute:: default_input_processor

@ -678,7 +702,7 @@ Here is a list of all built-in processors:
    >>> proc(['one', 'two', 'three'])
    'one<br>two<br>three'

-.. class:: Compose(\*functions, \**default_loader_context)
+.. class:: Compose(*functions, **default_loader_context)

    A processor which is constructed from the composition of the given
    functions. This means that each input value of this processor is passed to
@ -706,7 +730,7 @@ Here is a list of all built-in processors:
    active Loader context accessible through the :meth:`ItemLoader.context`
    attribute.

-.. class:: MapCompose(\*functions, \**default_loader_context)
+.. class:: MapCompose(*functions, **default_loader_context)

    A processor which is constructed from the composition of the given
    functions, similar to the :class:`Compose` processor. The difference with
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@ -9,8 +9,7 @@ Logging
    explicit calls to the Python standard logging. Keep reading to learn more
    about the new logging system.

-Scrapy uses `Python's builtin logging system
-<https://docs.python.org/3/library/logging.html>`_ for event logging. We'll
+Scrapy uses :mod:`logging` for event logging. We'll
 provide some simple examples to get you started, but for more advanced
 use-cases it's strongly suggested to read thoroughly its documentation.

@ -83,10 +82,10 @@ path::

 .. seealso::

-    Module logging, `HowTo <https://docs.python.org/2/howto/logging.html>`_
+    Module logging, :doc:`HowTo <howto/logging>`
        Basic Logging Tutorial

-    Module logging, `Loggers <https://docs.python.org/2/library/logging.html#logger-objects>`_
+    Module logging, :ref:`Loggers <logger>`
        Further documentation on loggers

 .. _topics-logging-from-spiders:
@ -165,10 +164,8 @@ possible levels listed in :ref:`topics-logging-levels`.

 :setting:`LOG_FORMAT` and :setting:`LOG_DATEFORMAT` specify formatting strings
 used as layouts for all messages. Those strings can contain any placeholders
-listed in `logging's logrecord attributes docs
-<https://docs.python.org/2/library/logging.html#logrecord-attributes>`_ and
-`datetime's strftime and strptime directives
-<https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_
+listed in :ref:`logging's logrecord attributes docs <logrecord-attributes>` and
+:ref:`datetime's strftime and strptime directives <strftime-strptime-behavior>`
 respectively.

 If :setting:`LOG_SHORT_NAMES` is set, then the logs will not display the Scrapy
@ -190,7 +187,7 @@ to override some of the Scrapy settings regarding logging.

 .. seealso::

-    Module `logging.handlers <https://docs.python.org/2/library/logging.handlers.html>`_
+    Module :mod:`logging.handlers`
        Further documentation on available handlers

 .. _custom-log-formats:
@ -205,6 +202,9 @@ A custom log format can be set for different actions by extending
 .. autoclass:: scrapy.logformatter.LogFormatter
   :members:

+
+.. _topics-logging-advanced-customization:
+
 Advanced customization
 ----------------------

@ -256,16 +256,15 @@ scrapy.utils.log module
    In that case, its usage is not required but it's recommended.

    Another option when running custom scripts is to manually configure the logging.
-    To do this you can use `logging.basicConfig()`_ to set a basic root handler.
+    To do this you can use :func:`logging.basicConfig` to set a basic root handler.

    Note that :class:`~scrapy.crawler.CrawlerProcess` automatically calls ``configure_logging``,
-    so it is recommended to only use `logging.basicConfig()`_ together with
+    so it is recommended to only use :func:`logging.basicConfig` together with
    :class:`~scrapy.crawler.CrawlerRunner`.

    This is an example on how to redirect ``INFO`` or higher messages to a file::

        import logging
-        from scrapy.utils.log import configure_logging

        logging.basicConfig(
            filename='log.txt',
@ -275,7 +274,3 @@ scrapy.utils.log module

    Refer to :ref:`run-from-script` for more details about using Scrapy this
    way.
-
-.. _logging.basicConfig(): https://docs.python.org/2/library/logging.html#logging.basicConfig
-
-
--- a/docs/topics/media-pipeline.rst
+++ b/docs/topics/media-pipeline.rst
@ -50,7 +50,7 @@ this:
 4. When the files are downloaded, another field (``files``) will be populated
   with the results. This field will contain a list of dicts with information
   about the downloaded files, such as the downloaded path, the original
-   scraped url (taken from the ``file_urls`` field) , and the file checksum.
+   scraped url (taken from the ``file_urls`` field), the file checksum and the file status.
   The files in the list of the ``files`` field will retain the same order of
   the original ``file_urls`` field. If some file failed downloading, an
   error will be logged and the file won't be present in the ``files`` field.
@ -201,6 +201,9 @@ For self-hosting you also might feel the need not to use SSL and not to verify S
 .. _s3.scality: https://s3.scality.com/
 .. _canned ACLs: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl

+
+.. _media-pipeline-gcs:
+
 Google Cloud Storage
 ---------------------

@ -243,20 +246,22 @@ Usage example
 .. setting:: IMAGES_URLS_FIELD
 .. setting:: IMAGES_RESULT_FIELD

-In order to use a media pipeline first, :ref:`enable it
+In order to use a media pipeline, first :ref:`enable it
 <topics-media-pipeline-enabling>`.

-Then, if a spider returns a dict with the URLs key (``file_urls`` or
-``image_urls``, for the Files or Images Pipeline respectively), the pipeline will
-put the results under respective key (``files`` or ``images``).
+Then, if a spider returns an :ref:`item object <topics-items>` with the URLs
+field (``file_urls`` or ``image_urls``, for the Files or Images Pipeline
+respectively), the pipeline will put the results under the respective field
+(``files`` or ``images``).

-If you prefer to use :class:`~.Item`, then define a custom item with the
-necessary fields, like in this example for Images Pipeline::
+When using :ref:`item types <item-types>` for which fields are defined beforehand,
+you must define both the URLs field and the results field. For example, when
+using the images pipeline, items must define both the ``image_urls`` and the
+``images`` field. For instance, using the :class:`~scrapy.item.Item` class::

    import scrapy

    class MyItem(scrapy.Item):
-
        # ... other item fields ...
        image_urls = scrapy.Field()
        images = scrapy.Field()
@ -445,8 +450,11 @@ See here the methods that you can override in your custom Files Pipeline:
      :meth:`~get_media_requests` method and return a Request for each
      file URL::

+         from itemadapter import ItemAdapter
+
         def get_media_requests(self, item, info):
-             for file_url in item['file_urls']:
+             adapter = ItemAdapter(item)
+             for file_url in adapter['file_urls']:
                 yield scrapy.Request(file_url)

      Those requests will be processed by the pipeline and, when they have finished
@ -470,6 +478,18 @@ See here the methods that you can override in your custom Files Pipeline:

        * ``checksum`` - a `MD5 hash`_ of the image contents

+        * ``status`` - the file status indication.
+
+          .. versionadded:: 2.2
+
+          It can be one of the following:
+
+          * ``downloaded`` - file was downloaded.
+          * ``uptodate`` - file was not downloaded, as it was downloaded recently,
+            according to the file expiration policy.
+          * ``cached`` - file was already scheduled for download, by another item
+            sharing the same file.
+
      The list of tuples received by :meth:`~item_completed` is
      guaranteed to retain the same order of the requests returned from the
      :meth:`~get_media_requests` method.
@ -479,7 +499,8 @@ See here the methods that you can override in your custom Files Pipeline:
          [(True,
            {'checksum': '2b00042f7481c7b056c4b410d28f33cf',
             'path': 'full/0a79c461a4062ac383dc4fade7bc09f1384a3910.jpg',
-             'url': 'http://www.example.com/files/product1.pdf'}),
+             'url': 'http://www.example.com/files/product1.pdf',
+             'status': 'downloaded'}),
           (False,
            Failure(...))]

@ -500,13 +521,15 @@ See here the methods that you can override in your custom Files Pipeline:
      store the downloaded file paths (passed in results) in the ``file_paths``
      item field, and we drop the item if it doesn't contain any files::

+          from itemadapter import ItemAdapter
          from scrapy.exceptions import DropItem

          def item_completed(self, results, item, info):
              file_paths = [x['path'] for ok, x in results if ok]
              if not file_paths:
                  raise DropItem("Item contains no files")
-              item['file_paths'] = file_paths
+              adapter = ItemAdapter(item)
+              adapter['file_paths'] = file_paths
              return item

      By default, the :meth:`item_completed` method returns the item.
@ -580,8 +603,9 @@ Here is a full example of the Images Pipeline whose methods are exemplified
 above::

    import scrapy
-    from scrapy.pipelines.images import ImagesPipeline
+    from itemadapter import ItemAdapter
    from scrapy.exceptions import DropItem
+    from scrapy.pipelines.images import ImagesPipeline

    class MyImagesPipeline(ImagesPipeline):

@ -593,7 +617,8 @@ above::
            image_paths = [x['path'] for ok, x in results if ok]
            if not image_paths:
                raise DropItem("Item contains no images")
-            item['image_paths'] = image_paths
+            adapter = ItemAdapter(item)
+            adapter['image_paths'] = image_paths
            return item


--- a/docs/topics/practices.rst
+++ b/docs/topics/practices.rst
@ -35,8 +35,9 @@ Here's an example showing how to run a single spider with it.
        ...

    process = CrawlerProcess(settings={
-        'FEED_FORMAT': 'json',
-        'FEED_URI': 'items.json'
+        "FEEDS": {
+            "items.json": {"format": "json"},
+        },
    })

    process.crawl(MySpider)
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@ -36,7 +36,7 @@ Request objects
    :type url: string

    :param callback: the function that will be called with the response of this
-       request (once its downloaded) as its first parameter. For more information
+       request (once it's downloaded) as its first parameter. For more information
       see :ref:`topics-request-response-ref-request-callback-arguments` below.
       If a Request doesn't specify a callback, the spider's
       :meth:`~scrapy.spiders.Spider.parse` method will be used.
@ -174,9 +174,9 @@ Request objects
        See :ref:`topics-request-meta` for a list of special meta keys
        recognized by Scrapy.

-        This dict is `shallow copied`_ when the request is cloned using the
-        ``copy()`` or ``replace()`` methods, and can also be accessed, in your
-        spider, from the ``response.meta`` attribute.
+        This dict is :doc:`shallow copied <library/copy>` when the request is
+        cloned using the ``copy()`` or ``replace()`` methods, and can also be
+        accessed, in your spider, from the ``response.meta`` attribute.

    .. attribute:: Request.cb_kwargs

@ -185,11 +185,13 @@ Request objects
        for new Requests, which means by default callbacks only get a :class:`Response`
        object as argument.

-        This dict is `shallow copied`_ when the request is cloned using the
-        ``copy()`` or ``replace()`` methods, and can also be accessed, in your
-        spider, from the ``response.cb_kwargs`` attribute.
+        This dict is :doc:`shallow copied <library/copy>` when the request is
+        cloned using the ``copy()`` or ``replace()`` methods, and can also be
+        accessed, in your spider, from the ``response.cb_kwargs`` attribute.

-    .. _shallow copied: https://docs.python.org/2/library/copy.html
+        In case of a failure to process the request, this dict can be accessed as
+        ``failure.request.cb_kwargs`` in the request's errback. For more information,
+        see :ref:`errback-cb_kwargs`.

    .. method:: Request.copy()

@ -314,6 +316,31 @@ errors if needed::
                request = failure.request
                self.logger.error('TimeoutError on %s', request.url)

+.. _errback-cb_kwargs:
+
+Accessing additional data in errback functions
+----------------------------------------------
+
+In case of a failure to process the request, you may be interested in
+accessing arguments to the callback functions so you can process further
+based on the arguments in the errback. The following example shows how to
+achieve this by using ``Failure.request.cb_kwargs``::
+
+    def parse(self, response):
+        request = scrapy.Request('http://www.example.com/index.html',
+                                 callback=self.parse_page2,
+                                 errback=self.errback_page2,
+                                 cb_kwargs=dict(main_url=response.url))
+        yield request
+
+    def parse_page2(self, response, main_url):
+        pass
+
+    def errback_page2(self, failure):
+        yield dict(
+            main_url=failure.request.cb_kwargs['main_url'],
+        )
+
 .. _topics-request-meta:

 Request.meta special keys
@ -387,6 +414,51 @@ The meta key is used set retry times per request. When initialized, the
 :reqmeta:`max_retry_times` meta key takes higher precedence over the
 :setting:`RETRY_TIMES` setting.

+
+.. _topics-stop-response-download:
+
+Stopping the download of a Response
+===================================
+
+Raising a :exc:`~scrapy.exceptions.StopDownload` exception from a
+:class:`~scrapy.signals.bytes_received` signal handler will stop the
+download of a given response. See the following example::
+
+    import scrapy
+
+
+    class StopSpider(scrapy.Spider):
+        name = "stop"
+        start_urls = ["https://docs.scrapy.org/en/latest/"]
+
+        @classmethod
+        def from_crawler(cls, crawler):
+            spider = super().from_crawler(crawler)
+            crawler.signals.connect(spider.on_bytes_received, signal=scrapy.signals.bytes_received)
+            return spider
+
+        def parse(self, response):
+            # 'last_chars' show that the full response was not downloaded
+            yield {"len": len(response.text), "last_chars": response.text[-40:]}
+
+        def on_bytes_received(self, data, request, spider):
+            raise scrapy.exceptions.StopDownload(fail=False)
+
+which produces the following output::
+
+    2020-05-19 17:26:12 [scrapy.core.engine] INFO: Spider opened
+    2020-05-19 17:26:12 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
+    2020-05-19 17:26:13 [scrapy.core.downloader.handlers.http11] DEBUG: Download stopped for <GET https://docs.scrapy.org/en/latest/> from signal handler StopSpider.on_bytes_received
+    2020-05-19 17:26:13 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://docs.scrapy.org/en/latest/> (referer: None) ['download_stopped']
+    2020-05-19 17:26:13 [scrapy.core.scraper] DEBUG: Scraped from <200 https://docs.scrapy.org/en/latest/>
+    {'len': 279, 'last_chars': 'dth, initial-scale=1.0">\n  \n  <title>Scr'}
+    2020-05-19 17:26:13 [scrapy.core.engine] INFO: Closing spider (finished)
+
+By default, resulting responses are handled by their corresponding errbacks. To
+call their callback instead, like in this example, pass ``fail=False`` to the
+:exc:`~scrapy.exceptions.StopDownload` exception.
+
+
 .. _topics-request-response-ref-request-subclasses:

 Request subclasses
@ -566,12 +638,10 @@ dealing with JSON requests.
      set to ``'POST'`` automatically.
   :type data: JSON serializable object

-   :param dumps_kwargs: Parameters that will be passed to underlying `json.dumps`_ method which is used to serialize
+   :param dumps_kwargs: Parameters that will be passed to underlying :func:`json.dumps` method which is used to serialize
       data into JSON format.
   :type dumps_kwargs: dict

-.. _json.dumps: https://docs.python.org/3/library/json.html#json.dumps
-
 JsonRequest usage example
 -------------------------

@ -620,6 +690,12 @@ Response objects
    :param certificate: an object representing the server's SSL certificate.
    :type certificate: twisted.internet.ssl.Certificate

+    :param ip_address: The IP address of the server from which the Response originated.
+    :type ip_address: :class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`
+
+    .. versionadded:: 2.1.0
+       The ``ip_address`` parameter.
+
    .. attribute:: Response.url

        A string containing the URL of the response.
@ -709,6 +785,16 @@ Response objects

        Only populated for ``https`` responses, ``None`` otherwise.

+    .. attribute:: Response.ip_address
+
+        .. versionadded:: 2.1.0
+
+        The IP address of the server from which the Response originated.
+
+        This attribute is currently only populated by the HTTP 1.1 download
+        handler, i.e. for ``http(s)`` responses. For other handlers,
+        :attr:`ip_address` is always ``None``.
+
    .. method:: Response.copy()

       Returns a new Response which is a copy of this Response.
@ -724,18 +810,16 @@ Response objects
        Constructs an absolute url by combining the Response's :attr:`url` with
        a possible relative url.

-        This is a wrapper over `urlparse.urljoin`_, it's merely an alias for
+        This is a wrapper over :func:`~urllib.parse.urljoin`, it's merely an alias for
        making this call::

-            urlparse.urljoin(response.url, url)
+            urllib.parse.urljoin(response.url, url)

    .. automethod:: Response.follow

    .. automethod:: Response.follow_all


-.. _urlparse.urljoin: https://docs.python.org/2/library/urlparse.html#urlparse.urljoin
-
 .. _topics-request-response-ref-response-subclasses:

 Response subclasses
@ -824,10 +908,10 @@ TextResponse objects

    .. automethod:: TextResponse.follow_all

-    .. method:: TextResponse.body_as_unicode()
+    .. automethod:: TextResponse.json()

-        The same as :attr:`text`, but available as a method. This method is
-        kept for backward compatibility; please prefer ``response.text``.
+        Returns a Python object from deserialized JSON document.
+        The result is cached after the first call.


 HtmlResponse objects
--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@ -14,7 +14,7 @@ achieve this, such as:
   drawback: it's slow.

 * `lxml`_ is an XML parsing library (which also parses HTML) with a pythonic
-   API based on `ElementTree`_. (lxml is not part of the Python standard
+   API based on :mod:`~xml.etree.ElementTree`. (lxml is not part of the Python standard
   library.)

 Scrapy comes with its own mechanism for extracting data. They're called
@ -36,7 +36,6 @@ defines selectors to associate those styles with specific HTML elements.

 .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
 .. _lxml: https://lxml.de/
-.. _ElementTree: https://docs.python.org/2/library/xml.etree.elementtree.html
 .. _XPath: https://www.w3.org/TR/xpath/all/
 .. _CSS: https://www.w3.org/TR/selectors
 .. _parsel: https://parsel.readthedocs.io/en/latest/
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@ -26,9 +26,7 @@ do this by using an environment variable, ``SCRAPY_SETTINGS_MODULE``.

 The value of ``SCRAPY_SETTINGS_MODULE`` should be in Python path syntax, e.g.
 ``myproject.settings``. Note that the settings module should be on the
-Python `import search path`_.
-
-.. _import search path: https://docs.python.org/2/tutorial/modules.html#the-module-search-path
+Python :ref:`import search path <tut-searchpath>`.

 .. _populating-settings:

@ -238,8 +236,8 @@ CONCURRENT_ITEMS

 Default: ``100``

-Maximum number of concurrent items (per response) to process in parallel in the
-Item Processor (also known as the :ref:`Item Pipeline <topics-item-pipeline>`).
+Maximum number of concurrent items (per response) to process in parallel in
+:ref:`item pipelines <topics-item-pipeline>`.

 .. setting:: CONCURRENT_REQUESTS

@ -422,10 +420,9 @@ connections (for ``HTTP10DownloadHandler``).
 .. note::

    HTTP/1.0 is rarely used nowadays so you can safely ignore this setting,
-    unless you use Twisted<11.1, or if you really want to use HTTP/1.0
-    and override :setting:`DOWNLOAD_HANDLERS_BASE` for ``http(s)`` scheme
-    accordingly, i.e. to
-    ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.
+    unless you really want to use HTTP/1.0 and override
+    :setting:`DOWNLOAD_HANDLERS` for ``http(s)`` scheme accordingly,
+    i.e. to ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.

 .. setting:: DOWNLOADER_CLIENTCONTEXTFACTORY

@ -449,7 +446,6 @@ or even enable client-side authentication (and various other things).
    Scrapy also has another context factory class that you can set,
    ``'scrapy.core.downloader.contextfactory.BrowserLikeContextFactory'``,
    which uses the platform's certificates to validate remote endpoints.
-    **This is only available if you use Twisted>=14.0.**

 If you do use a custom ContextFactory, make sure its ``__init__`` method
 accepts a ``method`` parameter (this is the ``OpenSSL.SSL`` method mapping
@ -473,7 +469,7 @@ necessary to access certain HTTPS websites: for example, you may need to use
 ``'DEFAULT:!DH'`` for a website with weak DH parameters or enable a
 specific cipher that is not included in ``DEFAULT`` if a website requires it.

-.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/ciphers.html#CIPHER-LIST-FORMAT
+.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/openssl-ciphers.html#CIPHER-LIST-FORMAT

 .. setting:: DOWNLOADER_CLIENT_TLS_METHOD

@ -496,10 +492,6 @@ This setting must be one of these string values:
 - ``'TLSv1.2'``: forces TLS version 1.2
 - ``'SSLv3'``: forces SSL version 3 (**not recommended**)

-.. note::
-
-    We recommend that you use PyOpenSSL>=0.13 and Twisted>=0.13
-    or above (Twisted>=14.0 if you can).

 .. setting:: DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING

@ -662,8 +654,6 @@ If you want to disable it set to 0.
    spider attribute and per-request using :reqmeta:`download_maxsize`
    Request.meta key.

-    This feature needs Twisted >= 11.1.
-
 .. setting:: DOWNLOAD_WARNSIZE

 DOWNLOAD_WARNSIZE
@ -681,8 +671,6 @@ If you want to disable it set to 0.
    spider attribute and per-request using :reqmeta:`download_warnsize`
    Request.meta key.

-    This feature needs Twisted >= 11.1.
-
 .. setting:: DOWNLOAD_FAIL_ON_DATALOSS

 DOWNLOAD_FAIL_ON_DATALOSS
@ -899,10 +887,9 @@ LOG_FORMAT

 Default: ``'%(asctime)s [%(name)s] %(levelname)s: %(message)s'``

-String for formatting log messages. Refer to the `Python logging documentation`_ for the whole list of available
-placeholders.
-
-.. _Python logging documentation: https://docs.python.org/2/library/logging.html#logrecord-attributes
+String for formatting log messages. Refer to the
+:ref:`Python logging documentation <logrecord-attributes>` for the qwhole
+list of available placeholders.

 .. setting:: LOG_DATEFORMAT

@ -912,10 +899,9 @@ LOG_DATEFORMAT
 Default: ``'%Y-%m-%d %H:%M:%S'``

 String for formatting date/time, expansion of the ``%(asctime)s`` placeholder
-in :setting:`LOG_FORMAT`. Refer to the `Python datetime documentation`_ for the whole list of available
-directives.
-
-.. _Python datetime documentation: https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
+in :setting:`LOG_FORMAT`. Refer to the
+:ref:`Python datetime documentation <strftime-strptime-behavior>` for the
+whole list of available directives.

 .. setting:: LOG_FORMATTER

@ -1116,17 +1102,6 @@ multi-purpose thread pool used by various Scrapy components. Threaded
 DNS Resolver, BlockingFeedStorage, S3FilesStore just to name a few. Increase
 this value if you're experiencing problems with insufficient blocking IO.

-.. setting:: REDIRECT_MAX_TIMES
-
-REDIRECT_MAX_TIMES
------------------
-
-Default: ``20``
-
-Defines the maximum times a request can be redirected. After this maximum the
-request's response is returned as is. We used Firefox default value for the
-same task.
-
 .. setting:: REDIRECT_PRIORITY_ADJUST

 REDIRECT_PRIORITY_ADJUST
@ -1422,17 +1397,6 @@ Default: ``True``
 A boolean which specifies if the :ref:`telnet console <topics-telnetconsole>`
 will be enabled (provided its extension is also enabled).

-.. setting:: TELNETCONSOLE_PORT
-
-TELNETCONSOLE_PORT
------------------
-
-Default: ``[6023, 6073]``
-
-The port range to use for the telnet console. If set to ``None`` or ``0``, a
-dynamically assigned port is used. For more info see
-:ref:`topics-telnetconsole`.
-
 .. setting:: TEMPLATES_DIR

 TEMPLATES_DIR
--- a/docs/topics/shell.rst
+++ b/docs/topics/shell.rst
@ -156,6 +156,17 @@ First, we launch the shell::

    scrapy shell 'https://scrapy.org' --nolog

+.. note::
+
+   Remember to always enclose URLs in quotes when running the Scrapy shell from
+   the command line, otherwise URLs containing arguments (i.e. the ``&`` character)
+   will not work.
+
+   On Windows, use double quotes instead::
+
+       scrapy shell "https://scrapy.org" --nolog
+
+
 Then, the shell fetches the URL (using the Scrapy downloader) and prints the
 list of available objects and useful shortcuts (you'll notice that these lines
 all start with the ``[s]`` prefix)::
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@ -16,8 +16,7 @@ deliver the arguments that the handler receives.
 You can connect to signals (or send your own) through the
 :ref:`topics-api-signals`.

-Here is a simple example showing how you can catch signals and perform some action:
-::
+Here is a simple example showing how you can catch signals and perform some action::

    from scrapy import signals
    from scrapy import Spider
@ -52,9 +51,45 @@ Deferred signal handlers
 ========================

 Some signals support returning :class:`~twisted.internet.defer.Deferred`
-objects from their handlers, see the :ref:`topics-signals-ref` below to know
-which ones.
+objects from their handlers, allowing you to run asynchronous code that
+does not block Scrapy. If a signal handler returns a
+:class:`~twisted.internet.defer.Deferred`, Scrapy waits for that
+:class:`~twisted.internet.defer.Deferred` to fire.

+Let's take an example::
+
+    class SignalSpider(scrapy.Spider):
+        name = 'signals'
+        start_urls = ['http://quotes.toscrape.com/page/1/']
+
+        @classmethod
+        def from_crawler(cls, crawler, *args, **kwargs):
+            spider = super(SignalSpider, cls).from_crawler(crawler, *args, **kwargs)
+            crawler.signals.connect(spider.item_scraped, signal=signals.item_scraped)
+            return spider
+
+        def item_scraped(self, item):
+            # Send the scraped item to the server
+            d = treq.post(
+                'http://example.com/post',
+                json.dumps(item).encode('ascii'),
+                headers={b'Content-Type': [b'application/json']}
+            )
+
+            # The next item will be scraped only after
+            # deferred (d) is fired
+            return d
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('small.author::text').get(),
+                    'tags': quote.css('div.tags a.tag::text').getall(),
+                }
+
+See the :ref:`topics-signals-ref` below to know which signals support
+:class:`~twisted.internet.defer.Deferred`.

 .. _topics-signals-ref:

@ -66,22 +101,25 @@ Built-in signals reference

 Here's the list of Scrapy built-in signals and their meaning.

-engine_started
+Engine signals
 --------------

+engine_started
+~~~~~~~~~~~~~~
+
 .. signal:: engine_started
 .. function:: engine_started()

    Sent when the Scrapy engine has started crawling.

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.

 .. note:: This signal may be fired *after* the :signal:`spider_opened` signal,
    depending on how the spider was started. So **don't** rely on this signal
    getting fired before :signal:`spider_opened`.

 engine_stopped
--------------
+~~~~~~~~~~~~~~

 .. signal:: engine_stopped
 .. function:: engine_stopped()
@ -89,10 +127,21 @@ engine_stopped
    Sent when the Scrapy engine is stopped (for example, when a crawling
    process has finished).

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
+
+Item signals
+------------
+
+.. note::
+    As at max :setting:`CONCURRENT_ITEMS` items are processed in
+    parallel, many deferreds are fired together using
+    :class:`~twisted.internet.defer.DeferredList`. Hence the next
+    batch waits for the :class:`~twisted.internet.defer.DeferredList`
+    to fire and then runs the respective item signal handler for
+    the next batch of scraped items.

 item_scraped
------------
+~~~~~~~~~~~~

 .. signal:: item_scraped
 .. function:: item_scraped(item, response, spider)
@ -100,10 +149,10 @@ item_scraped
    Sent when an item has been scraped, after it has passed all the
    :ref:`topics-item-pipeline` stages (without being dropped).

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.

-    :param item: the item scraped
-    :type item: dict or :class:`~scrapy.item.Item` object
+    :param item: the scraped item
+    :type item: :ref:`item object <item-types>`

    :param spider: the spider which scraped the item
    :type spider: :class:`~scrapy.spiders.Spider` object
@ -112,7 +161,7 @@ item_scraped
    :type response: :class:`~scrapy.http.Response` object

 item_dropped
------------
+~~~~~~~~~~~~

 .. signal:: item_dropped
 .. function:: item_dropped(item, response, exception, spider)
@ -120,10 +169,10 @@ item_dropped
    Sent after an item has been dropped from the :ref:`topics-item-pipeline`
    when some stage raised a :exc:`~scrapy.exceptions.DropItem` exception.

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.

    :param item: the item dropped from the :ref:`topics-item-pipeline`
-    :type item: dict or :class:`~scrapy.item.Item` object
+    :type item: :ref:`item object <item-types>`

    :param spider: the spider which scraped the item
    :type spider: :class:`~scrapy.spiders.Spider` object
@ -137,7 +186,7 @@ item_dropped
    :type exception: :exc:`~scrapy.exceptions.DropItem` exception

 item_error
------------
+~~~~~~~~~~

 .. signal:: item_error
 .. function:: item_error(item, response, spider, failure)
@ -145,10 +194,10 @@ item_error
    Sent when a :ref:`topics-item-pipeline` generates an error (i.e. raises
    an exception), except :exc:`~scrapy.exceptions.DropItem` exception.

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.

-    :param item: the item dropped from the :ref:`topics-item-pipeline`
-    :type item: dict or :class:`~scrapy.item.Item` object
+    :param item: the item that caused the error in the :ref:`topics-item-pipeline`
+    :type item: :ref:`item object <item-types>`

    :param response: the response being processed when the exception was raised
    :type response: :class:`~scrapy.http.Response` object
@ -159,8 +208,11 @@ item_error
    :param failure: the exception raised
    :type failure: twisted.python.failure.Failure

+Spider signals
+--------------
+
 spider_closed
-------------
+~~~~~~~~~~~~~

 .. signal:: spider_closed
 .. function:: spider_closed(spider, reason)
@ -168,7 +220,7 @@ spider_closed
    Sent after a spider has been closed. This can be used to release per-spider
    resources reserved on :signal:`spider_opened`.

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.

    :param spider: the spider which has been closed
    :type spider: :class:`~scrapy.spiders.Spider` object
@ -183,7 +235,7 @@ spider_closed
    :type reason: str

 spider_opened
-------------
+~~~~~~~~~~~~~

 .. signal:: spider_opened
 .. function:: spider_opened(spider)
@ -192,13 +244,13 @@ spider_opened
    reserve per-spider resources, but can be used for any task that needs to be
    performed when a spider is opened.

-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.

    :param spider: the spider which has been opened
    :type spider: :class:`~scrapy.spiders.Spider` object

 spider_idle
-----------
+~~~~~~~~~~~

 .. signal:: spider_idle
 .. function:: spider_idle(spider)
@ -216,7 +268,7 @@ spider_idle
    You may raise a :exc:`~scrapy.exceptions.DontCloseSpider` exception to
    prevent the spider from being closed.

-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param spider: the spider which has gone idle
    :type spider: :class:`~scrapy.spiders.Spider` object
@ -228,14 +280,14 @@ spider_idle
    due to duplication).

 spider_error
------------
+~~~~~~~~~~~~

 .. signal:: spider_error
 .. function:: spider_error(failure, response, spider)

    Sent when a spider callback generates an error (i.e. raises an exception).

-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param failure: the exception raised
    :type failure: twisted.python.failure.Failure
@ -246,8 +298,11 @@ spider_error
    :param spider: the spider which raised the exception
    :type spider: :class:`~scrapy.spiders.Spider` object

+Request signals
+---------------
+
 request_scheduled
-----------------
+~~~~~~~~~~~~~~~~~

 .. signal:: request_scheduled
 .. function:: request_scheduled(request, spider)
@ -255,7 +310,7 @@ request_scheduled
    Sent when the engine schedules a :class:`~scrapy.http.Request`, to be
    downloaded later.

-    The signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param request: the request that reached the scheduler
    :type request: :class:`~scrapy.http.Request` object
@ -264,7 +319,7 @@ request_scheduled
    :type spider: :class:`~scrapy.spiders.Spider` object

 request_dropped
---------------
+~~~~~~~~~~~~~~~

 .. signal:: request_dropped
 .. function:: request_dropped(request, spider)
@ -272,7 +327,7 @@ request_dropped
    Sent when a :class:`~scrapy.http.Request`, scheduled by the engine to be
    downloaded later, is rejected by the scheduler.

-    The signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param request: the request that reached the scheduler
    :type request: :class:`~scrapy.http.Request` object
@ -281,14 +336,14 @@ request_dropped
    :type spider: :class:`~scrapy.spiders.Spider` object

 request_reached_downloader
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~

 .. signal:: request_reached_downloader
 .. function:: request_reached_downloader(request, spider)

    Sent when a :class:`~scrapy.http.Request` reached downloader.

-    The signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param request: the request that reached downloader
    :type request: :class:`~scrapy.http.Request` object
@ -297,7 +352,7 @@ request_reached_downloader
    :type spider: :class:`~scrapy.spiders.Spider` object

 request_left_downloader
-----------------------
+~~~~~~~~~~~~~~~~~~~~~~~

 .. signal:: request_left_downloader
 .. function:: request_left_downloader(request, spider)
@ -315,8 +370,41 @@ request_left_downloader
    :param spider: the spider that yielded the request
    :type spider: :class:`~scrapy.spiders.Spider` object

+bytes_received
+~~~~~~~~~~~~~~
+
+.. versionadded:: 2.2
+
+.. signal:: bytes_received
+.. function:: bytes_received(data, request, spider)
+
+    Sent by the HTTP 1.1 and S3 download handlers when a group of bytes is
+    received for a specific request. This signal might be fired multiple
+    times for the same request, with partial data each time. For instance,
+    a possible scenario for a 25 kb response would be two signals fired
+    with 10 kb of data, and a final one with 5 kb of data.
+
+    This signal does not support returning deferreds from its handlers.
+
+    :param data: the data received by the download handler
+    :type data: :class:`bytes` object
+
+    :param request: the request that generated the download
+    :type request: :class:`~scrapy.http.Request` object
+
+    :param spider: the spider associated with the response
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+.. note:: Handlers of this signal can stop the download of a response while it
+    is in progress by raising the :exc:`~scrapy.exceptions.StopDownload`
+    exception. Please refer to the :ref:`topics-stop-response-download` topic
+    for additional information and examples.
+
+Response signals
+----------------
+
 response_received
-----------------
+~~~~~~~~~~~~~~~~~

 .. signal:: response_received
 .. function:: response_received(response, request, spider)
@ -324,7 +412,7 @@ response_received
    Sent when the engine receives a new :class:`~scrapy.http.Response` from the
    downloader.

-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param response: the response received
    :type response: :class:`~scrapy.http.Response` object
@ -336,14 +424,14 @@ response_received
    :type spider: :class:`~scrapy.spiders.Spider` object

 response_downloaded
-------------------
+~~~~~~~~~~~~~~~~~~~

 .. signal:: response_downloaded
 .. function:: response_downloaded(response, request, spider)

    Sent by the downloader right after a ``HTTPResponse`` is downloaded.

-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.

    :param response: the response downloaded
    :type response: :class:`~scrapy.http.Response` object
--- a/docs/topics/spider-middleware.rst
+++ b/docs/topics/spider-middleware.rst
@ -102,29 +102,28 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
        it has processed the response.

        :meth:`process_spider_output` must return an iterable of
-        :class:`~scrapy.http.Request`, dict or :class:`~scrapy.item.Item`
-        objects.
+        :class:`~scrapy.http.Request` objects and :ref:`item object
+        <topics-items>`.

        :param response: the response which generated this output from the
          spider
        :type response: :class:`~scrapy.http.Response` object

        :param result: the result returned by the spider
-        :type result: an iterable of :class:`~scrapy.http.Request`, dict
-          or :class:`~scrapy.item.Item` objects
+        :type result: an iterable of :class:`~scrapy.http.Request` objects and
+          :ref:`item object <topics-items>`

        :param spider: the spider whose result is being processed
        :type spider: :class:`~scrapy.spiders.Spider` object

-
    .. method:: process_spider_exception(response, exception, spider)

        This method is called when a spider or :meth:`process_spider_output`
        method (from a previous spider middleware) raises an exception.

        :meth:`process_spider_exception` should return either ``None`` or an
-        iterable of :class:`~scrapy.http.Request`, dict or
-        :class:`~scrapy.item.Item` objects.
+        iterable of :class:`~scrapy.http.Request` objects and :ref:`item object
+        <topics-items>`.

        If it returns ``None``, Scrapy will continue processing this exception,
        executing any other :meth:`process_spider_exception` in the following
@ -140,7 +139,7 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
        :type response: :class:`~scrapy.http.Response` object

        :param exception: the exception raised
-        :type exception: `Exception`_ object
+        :type exception: :exc:`Exception` object

        :param spider: the spider which raised the exception
        :type spider: :class:`~scrapy.spiders.Spider` object
@ -183,10 +182,6 @@ object gives you access, for example, to the :ref:`settings <topics-settings>`.
       :param crawler: crawler that uses this middleware
       :type crawler: :class:`~scrapy.crawler.Crawler` object

-
-.. _Exception: https://docs.python.org/2/library/exceptions.html#exceptions.Exception
-
-
 .. _topics-spider-middleware-ref:

 Built-in spider middleware reference
--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@ -23,8 +23,8 @@ For spiders, the scraping cycle goes through something like this:
   :attr:`~scrapy.spiders.Spider.parse` method as callback function for the
   Requests.

-2. In the callback function, you parse the response (web page) and return either
-   dicts with extracted data, :class:`~scrapy.item.Item` objects,
+2. In the callback function, you parse the response (web page) and return
+   :ref:`item objects <topics-items>`,
   :class:`~scrapy.http.Request` objects, or an iterable of these objects.
   Those Requests will also contain a callback (maybe
   the same) and will then be downloaded by Scrapy and then their
@ -121,7 +121,7 @@ scrapy.Spider
      send log messages through it as described on
      :ref:`topics-logging-from-spiders`.

-   .. method:: from_crawler(crawler, \*args, \**kwargs)
+   .. method:: from_crawler(crawler, *args, **kwargs)

       This is the class method used by Scrapy to create your spiders.

@ -179,8 +179,8 @@ scrapy.Spider
       the same requirements as the :class:`Spider` class.

       This method, as well as any other Request callback, must return an
-       iterable of :class:`~scrapy.http.Request` and/or
-       dicts or :class:`~scrapy.item.Item` objects.
+       iterable of :class:`~scrapy.http.Request` and/or :ref:`item objects
+       <topics-items>`.

       :param response: the response to parse
       :type response: :class:`~scrapy.http.Response`
@ -234,7 +234,7 @@ Return multiple Requests and items from a single callback::
                yield scrapy.Request(response.urljoin(href), self.parse)

 Instead of :attr:`~.start_urls` you can use :meth:`~.start_requests` directly;
-to give data more structure you can use :ref:`topics-items`::
+to give data more structure you can use :class:`~scrapy.item.Item` objects::

    import scrapy
    from myproject.items import MyItem
@ -298,9 +298,7 @@ Keep in mind that spider arguments are only strings.
 The spider will not do any parsing on its own.
 If you were to set the ``start_urls`` attribute from the command line,
 you would have to parse it on your own into a list
-using something like
-`ast.literal_eval <https://docs.python.org/3/library/ast.html#ast.literal_eval>`_
-or `json.loads <https://docs.python.org/3/library/json.html#json.loads>`_
+using something like :func:`ast.literal_eval` or :func:`json.loads`
 and then set it as an attribute.
 Otherwise, you would cause iteration over a ``start_urls`` string
 (a very common python pitfall)
@ -366,7 +364,7 @@ CrawlSpider

      This method is called for the start_urls responses. It allows to parse
      the initial responses and must return either an
-      :class:`~scrapy.item.Item` object, a :class:`~scrapy.http.Request`
+      :ref:`item object <topics-items>`, a :class:`~scrapy.http.Request`
      object, or an iterable containing any of them.

 Crawling rules
@ -385,7 +383,7 @@ Crawling rules
   object with that name will be used) to be called for each link extracted with
   the specified link extractor. This callback receives a :class:`~scrapy.http.Response`
   as its first argument and must return either a single instance or an iterable of
-   :class:`~scrapy.item.Item`, ``dict`` and/or :class:`~scrapy.http.Request` objects
+   :ref:`item objects <topics-items>` and/or :class:`~scrapy.http.Request` objects
   (or any subclass of them). As mentioned above, the received :class:`~scrapy.http.Response`
   object will contain the text of the link that produced the :class:`~scrapy.http.Request`
   in its ``meta`` dictionary (under the ``link_text`` key)
@ -533,7 +531,7 @@ XMLFeedSpider
        (``itertag``).  Receives the response and an
        :class:`~scrapy.selector.Selector` for each node.  Overriding this
        method is mandatory. Otherwise, you spider won't work.  This method
-        must return either a :class:`~scrapy.item.Item` object, a
+        must return an :ref:`item object <topics-items>`, a
        :class:`~scrapy.http.Request` object, or an iterable containing any of
        them.

@ -543,7 +541,7 @@ XMLFeedSpider
        spider, and it's intended to perform any last time processing required
        before returning the results to the framework core, for example setting the
        item IDs. It receives a list of results and the response which originated
-        those results. It must return a list of results (Items or Requests).
+        those results. It must return a list of results (items or requests).


 XMLFeedSpider example
--- a/docs/topics/telnetconsole.rst
+++ b/docs/topics/telnetconsole.rst
@ -89,13 +89,11 @@ convenience:
 +----------------+-------------------------------------------------------------------+
 | ``prefs``      | for memory debugging (see :ref:`topics-leaks`)                    |
 +----------------+-------------------------------------------------------------------+
-| ``p``          | a shortcut to the `pprint.pprint`_ function                       |
+| ``p``          | a shortcut to the :func:`pprint.pprint` function                  |
 +----------------+-------------------------------------------------------------------+
 | ``hpy``        | for memory debugging (see :ref:`topics-leaks`)                    |
 +----------------+-------------------------------------------------------------------+

-.. _pprint.pprint: https://docs.python.org/library/pprint.html#pprint.pprint
-
 Telnet console usage examples
 =============================

@ -208,4 +206,3 @@ Default: ``None``

 The password used for the telnet console, default behaviour is to have it
 autogenerated
-
--- a/docs/utils/linkfix.py
+++ b/docs/utils/linkfix.py
@ -14,24 +14,27 @@ Author: dufferzafar

 import re

-# Used for remembering the file (and its contents)
-# so we don't have to open the same file again.
-_filename = None
-_contents = None

-# A regex that matches standard linkcheck output lines
-line_re = re.compile(u'(.*)\:\d+\:\s\[(.*)\]\s(?:(.*)\sto\s(.*)|(.*))')
+def main():

-# Read lines from the linkcheck output file
-try:
+    # Used for remembering the file (and its contents)
+    # so we don't have to open the same file again.
+    _filename = None
+    _contents = None
+
+    # A regex that matches standard linkcheck output lines
+    line_re = re.compile(u'(.*)\:\d+\:\s\[(.*)\]\s(?:(.*)\sto\s(.*)|(.*))')
+
+    # Read lines from the linkcheck output file
+    try:
        with open("build/linkcheck/output.txt") as out:
            output_lines = out.readlines()
-except IOError:
+    except IOError:
        print("linkcheck output not found; please run linkcheck first.")
        exit(1)

-# For every line, fix the respective file
-for line in output_lines:
+    # For every line, fix the respective file
+    for line in output_lines:
        match = re.match(line_re, line)

        if match:
@ -61,3 +64,7 @@ for line in output_lines:
        else:
            # We don't understand what the current line means!
            print("Not Understood: " + line)
+
+
+if __name__ == '__main__':
+    main()
--- a/113
+++ b/113
@ -0,0 +1,113 @@
+[MASTER]
+persistent=no
+jobs=1  # >1 hides results
+
+[MESSAGES CONTROL]
+disable=abstract-method,
+        anomalous-backslash-in-string,
+        arguments-differ,
+        attribute-defined-outside-init,
+        bad-classmethod-argument,
+        bad-continuation,
+        bad-indentation,
+        bad-mcs-classmethod-argument,
+        bad-super-call,
+        bad-whitespace,
+        bare-except,
+        blacklisted-name,
+        broad-except,
+        c-extension-no-member,
+        catching-non-exception,
+        cell-var-from-loop,
+        comparison-with-callable,
+        consider-iterating-dictionary,
+        consider-using-in,
+        consider-using-set-comprehension,
+        consider-using-sys-exit,
+        cyclic-import,
+        dangerous-default-value,
+        deprecated-method,
+        deprecated-module,
+        duplicate-code,  # https://github.com/PyCQA/pylint/issues/214
+        eval-used,
+        expression-not-assigned,
+        fixme,
+        function-redefined,
+        global-statement,
+        import-error,
+        import-outside-toplevel,
+        import-self,
+        inconsistent-return-statements,
+        inherit-non-class,
+        invalid-name,
+        invalid-overridden-method,
+        isinstance-second-argument-not-valid-type,
+        keyword-arg-before-vararg,
+        line-too-long,
+        logging-format-interpolation,
+        logging-not-lazy,
+        lost-exception,
+        method-hidden,
+        misplaced-comparison-constant,
+        missing-docstring,
+        missing-final-newline,
+        multiple-imports,
+        multiple-statements,
+        no-else-continue,
+        no-else-raise,
+        no-else-return,
+        no-init,
+        no-member,
+        no-method-argument,
+        no-name-in-module,
+        no-self-argument,
+        no-self-use,
+        no-value-for-parameter,
+        not-an-iterable,
+        not-callable,
+        pointless-statement,
+        pointless-string-statement,
+        protected-access,
+        redefined-argument-from-local,
+        redefined-builtin,
+        redefined-outer-name,
+        reimported,
+        signature-differs,
+        singleton-comparison,
+        super-init-not-called,
+        superfluous-parens,
+        too-few-public-methods,
+        too-many-ancestors,
+        too-many-arguments,
+        too-many-branches,
+        too-many-format-args,
+        too-many-function-args,
+        too-many-instance-attributes,
+        too-many-lines,
+        too-many-locals,
+        too-many-public-methods,
+        too-many-return-statements,
+        trailing-newlines,
+        trailing-whitespace,
+        unbalanced-tuple-unpacking,
+        undefined-variable,
+        undefined-loop-variable,
+        unexpected-special-method-signature,
+        ungrouped-imports,
+        unidiomatic-typecheck,
+        unnecessary-comprehension,
+        unnecessary-lambda,
+        unnecessary-pass,
+        unreachable,
+        unsubscriptable-object,
+        unused-argument,
+        unused-import,
+        unused-variable,
+        unused-wildcard-import,
+        used-before-assignment,
+        useless-object-inheritance,  # Required for Python 2 support
+        useless-return,
+        useless-super-delegation,
+        wildcard-import,
+        wrong-import-order,
+        wrong-import-position
--- a/pytest.ini
+++ b/pytest.ini
@ -20,232 +20,23 @@ addopts =
 twisted = 1
 markers =
    only_asyncio: marks tests as only enabled when --reactor=asyncio is passed
+flake8-max-line-length = 119
 flake8-ignore =
    W503
-    # Files that are only meant to provide top-level imports are expected not
-    # to use any of their imports:
+
+    # Exclude files that are meant to provide top-level imports
+    # E402: Module level import not at top of file
+    # F401: Module imported but unused
+    scrapy/__init__.py E402
    scrapy/core/downloader/handlers/http.py F401
    scrapy/http/__init__.py F401
+    scrapy/linkextractors/__init__.py E402 F401
+    scrapy/selector/__init__.py F401
+    scrapy/spiders/__init__.py E402 F401
+
    # Issues pending a review:
-    # extras
-    extras/qps-bench-server.py E501
-    extras/qpsclient.py E501 E501
-    # scrapy/commands
-    scrapy/commands/__init__.py E128 E501
-    scrapy/commands/check.py E501
-    scrapy/commands/crawl.py E501
-    scrapy/commands/edit.py E501
-    scrapy/commands/fetch.py E401 E501 E128 E731
-    scrapy/commands/genspider.py E128 E501 E502
-    scrapy/commands/parse.py E128 E501 E731
-    scrapy/commands/runspider.py E501
-    scrapy/commands/settings.py E128
-    scrapy/commands/shell.py E128 E501 E502
-    scrapy/commands/startproject.py E127 E501 E128
-    scrapy/commands/version.py E501 E128
-    # scrapy/contracts
-    scrapy/contracts/__init__.py E501 W504
-    scrapy/contracts/default.py E128
-    # scrapy/core
-    scrapy/core/engine.py E501 E128 E127 E502
-    scrapy/core/scheduler.py E501
-    scrapy/core/scraper.py E501 E128 W504
-    scrapy/core/spidermw.py E501 E731 E126
-    scrapy/core/downloader/__init__.py E501
-    scrapy/core/downloader/contextfactory.py E501 E128 E126
-    scrapy/core/downloader/middleware.py E501 E502
-    scrapy/core/downloader/tls.py E501 E241
-    scrapy/core/downloader/webclient.py E731 E501 E128 E126
-    scrapy/core/downloader/handlers/__init__.py E501
-    scrapy/core/downloader/handlers/ftp.py E501 E128 E127
-    scrapy/core/downloader/handlers/http10.py E501
-    scrapy/core/downloader/handlers/http11.py E501
-    scrapy/core/downloader/handlers/s3.py E501 E128 E126
-    # scrapy/downloadermiddlewares
-    scrapy/downloadermiddlewares/ajaxcrawl.py E501
-    scrapy/downloadermiddlewares/decompression.py E501
-    scrapy/downloadermiddlewares/defaultheaders.py E501
-    scrapy/downloadermiddlewares/httpcache.py E501 E126
-    scrapy/downloadermiddlewares/httpcompression.py E501 E128
-    scrapy/downloadermiddlewares/httpproxy.py E501
-    scrapy/downloadermiddlewares/redirect.py E501 W504
-    scrapy/downloadermiddlewares/retry.py E501 E126
-    scrapy/downloadermiddlewares/robotstxt.py E501
-    scrapy/downloadermiddlewares/stats.py E501
-    # scrapy/extensions
-    scrapy/extensions/closespider.py E501 E128 E123
-    scrapy/extensions/corestats.py E501
-    scrapy/extensions/feedexport.py E128 E501
-    scrapy/extensions/httpcache.py E128 E501
-    scrapy/extensions/memdebug.py E501
-    scrapy/extensions/spiderstate.py E501
-    scrapy/extensions/telnet.py E501 W504
-    scrapy/extensions/throttle.py E501
-    # scrapy/http
-    scrapy/http/common.py E501
-    scrapy/http/cookies.py E501
-    scrapy/http/request/__init__.py E501
-    scrapy/http/request/form.py E501 E123
-    scrapy/http/request/json_request.py E501
-    scrapy/http/response/__init__.py E501 E128
-    scrapy/http/response/text.py E501 E128 E124
-    # scrapy/linkextractors
-    scrapy/linkextractors/__init__.py E731 E501 E402 W504
-    scrapy/linkextractors/lxmlhtml.py E501 E731
-    # scrapy/loader
-    scrapy/loader/__init__.py E501 E128
-    scrapy/loader/processors.py E501
-    # scrapy/pipelines
-    scrapy/pipelines/__init__.py E501
-    scrapy/pipelines/files.py E116 E501 E266
-    scrapy/pipelines/images.py E265 E501
-    scrapy/pipelines/media.py E125 E501 E266
-    # scrapy/selector
-    scrapy/selector/__init__.py F403
-    scrapy/selector/unified.py E501 E111
-    # scrapy/settings
-    scrapy/settings/__init__.py E501
-    scrapy/settings/default_settings.py E501 E114 E116
-    scrapy/settings/deprecated.py E501
-    # scrapy/spidermiddlewares
-    scrapy/spidermiddlewares/httperror.py E501
-    scrapy/spidermiddlewares/offsite.py E501
-    scrapy/spidermiddlewares/referer.py E501 E129 W504
-    scrapy/spidermiddlewares/urllength.py E501
-    # scrapy/spiders
-    scrapy/spiders/__init__.py E501 E402
-    scrapy/spiders/crawl.py E501
-    scrapy/spiders/feed.py E501
-    scrapy/spiders/sitemap.py E501
-    # scrapy/utils
-    scrapy/utils/asyncio.py E501
-    scrapy/utils/benchserver.py E501
-    scrapy/utils/conf.py E402 E501
-    scrapy/utils/datatypes.py E501
-    scrapy/utils/decorators.py E501
-    scrapy/utils/defer.py E501 E128
-    scrapy/utils/deprecate.py E128 E501 E127 E502
-    scrapy/utils/gz.py E501 W504
    scrapy/utils/http.py F403
-    scrapy/utils/httpobj.py E501
-    scrapy/utils/iterators.py E501
-    scrapy/utils/log.py E128 E501
    scrapy/utils/markup.py F403
-    scrapy/utils/misc.py E501
    scrapy/utils/multipart.py F403
-    scrapy/utils/project.py E501
-    scrapy/utils/python.py E501
-    scrapy/utils/reactor.py E501
-    scrapy/utils/reqser.py E501
-    scrapy/utils/request.py E127 E501
-    scrapy/utils/response.py E501 E128
-    scrapy/utils/signal.py E501 E128
-    scrapy/utils/sitemap.py E501
-    scrapy/utils/spider.py E501
-    scrapy/utils/ssl.py E501
-    scrapy/utils/test.py E501
-    scrapy/utils/url.py E501 F403 E128 F405
-    # scrapy
-    scrapy/__init__.py E402 E501
-    scrapy/cmdline.py E501
-    scrapy/crawler.py E501
-    scrapy/dupefilters.py E501 E202
-    scrapy/exceptions.py E501
-    scrapy/exporters.py E501
-    scrapy/interfaces.py E501
-    scrapy/item.py E501 E128
-    scrapy/link.py E501
-    scrapy/logformatter.py E501
-    scrapy/mail.py E402 E128 E501 E502
-    scrapy/middleware.py E128 E501
-    scrapy/pqueues.py E501
-    scrapy/resolver.py E501
-    scrapy/responsetypes.py E128 E501
-    scrapy/robotstxt.py E501
-    scrapy/shell.py E501
-    scrapy/signalmanager.py E501
-    scrapy/spiderloader.py F841 E501 E126
-    scrapy/squeues.py E128
-    scrapy/statscollectors.py E501
-    # tests
-    tests/__init__.py E402 E501
-    tests/mockserver.py E401 E501 E126 E123
-    tests/pipelines.py F841
-    tests/spiders.py E501 E127
-    tests/test_closespider.py E501 E127
-    tests/test_command_fetch.py E501
-    tests/test_command_parse.py E501 E128
-    tests/test_command_shell.py E501 E128
-    tests/test_commands.py E128 E501
-    tests/test_contracts.py E501 E128
-    tests/test_crawl.py E501 E741 E265
-    tests/test_crawler.py F841 E501
-    tests/test_dependencies.py F841 E501
-    tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
-    tests/test_downloadermiddleware.py E501
-    tests/test_downloadermiddleware_ajaxcrawlable.py E501
-    tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
-    tests/test_downloadermiddleware_decompression.py E127
-    tests/test_downloadermiddleware_defaultheaders.py E501
-    tests/test_downloadermiddleware_downloadtimeout.py E501
-    tests/test_downloadermiddleware_httpcache.py E501
-    tests/test_downloadermiddleware_httpcompression.py E501 E126 E123
-    tests/test_downloadermiddleware_httpproxy.py E501 E128
-    tests/test_downloadermiddleware_redirect.py E501 E128 E127
-    tests/test_downloadermiddleware_retry.py E501 E128 E126
-    tests/test_downloadermiddleware_robotstxt.py E501
-    tests/test_downloadermiddleware_stats.py E501
-    tests/test_dupefilters.py E501 E741 E128 E124
-    tests/test_engine.py E401 E501 E128
-    tests/test_exporters.py E501 E731 E128 E124
-    tests/test_extension_telnet.py F841
-    tests/test_feedexport.py E501 F841 E241
-    tests/test_http_cookies.py E501
-    tests/test_http_headers.py E501
-    tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
-    tests/test_http_response.py E501 E128 E265
-    tests/test_item.py E128 F841
-    tests/test_link.py E501
-    tests/test_linkextractors.py E501 E128 E124
-    tests/test_loader.py E501 E731 E741 E128 E117 E241
-    tests/test_logformatter.py E128 E501 E122
-    tests/test_mail.py E128 E501
-    tests/test_middleware.py E501 E128
-    tests/test_pipeline_crawl.py E501 E128 E126
-    tests/test_pipeline_files.py E501
-    tests/test_pipeline_images.py F841 E501
-    tests/test_pipeline_media.py E501 E741 E731 E128 E502
-    tests/test_proxy_connect.py E501 E741
-    tests/test_request_cb_kwargs.py E501
-    tests/test_responsetypes.py E501
-    tests/test_robotstxt_interface.py E501 E501
-    tests/test_scheduler.py E501 E126 E123
-    tests/test_selector.py E501 E127
-    tests/test_spider.py E501
-    tests/test_spidermiddleware.py E501
-    tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
-    tests/test_spidermiddleware_offsite.py E501 E128 E111
-    tests/test_spidermiddleware_output_chain.py E501
-    tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
-    tests/test_squeues.py E501 E741
-    tests/test_utils_asyncio.py E501
-    tests/test_utils_conf.py E501 E128
-    tests/test_utils_curl.py E501
-    tests/test_utils_datatypes.py E402 E501
-    tests/test_utils_defer.py E501 F841
-    tests/test_utils_deprecate.py F841 E501
-    tests/test_utils_http.py E501 E128 W504
-    tests/test_utils_iterators.py E501 E128 E129 E241
-    tests/test_utils_log.py E741
-    tests/test_utils_python.py E501 E731
-    tests/test_utils_reqser.py E501 E128
-    tests/test_utils_request.py E501 E128
-    tests/test_utils_response.py E501
-    tests/test_utils_signal.py E741 F841 E731
-    tests/test_utils_sitemap.py E128 E501 E124
-    tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
-    tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
-    tests/test_cmdline/__init__.py E501
-    tests/test_settings/__init__.py E501 E128
-    tests/test_spiderloader/__init__.py E128 E501
-    tests/test_utils_misc/__init__.py E501
+    scrapy/utils/url.py F403 F405
+    tests/test_loader.py E741
--- a/scrapy/VERSION
+++ b/scrapy/VERSION
@ -1 +1 @@
-2.0.0
+2.2.0
--- a/scrapy/init.py
+++ b/scrapy/init.py
@ -2,33 +2,11 @@
 Scrapy - a web crawling and web scraping framework written for Python
 """

-__all__ = ['__version__', 'version_info', 'twisted_version',
-           'Spider', 'Request', 'FormRequest', 'Selector', 'Item', 'Field']
-
-# Scrapy version
 import pkgutil
-__version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip()
-version_info = tuple(int(v) if v.isdigit() else v
-                     for v in __version__.split('.'))
-del pkgutil
-
-# Check minimum required Python version
 import sys
-if sys.version_info < (3, 5):
-    print("Scrapy %s requires Python 3.5" % __version__)
-    sys.exit(1)
-
-# Ignore noisy twisted deprecation warnings
 import warnings
-warnings.filterwarnings('ignore', category=DeprecationWarning, module='twisted')
-del warnings
-
-# Apply monkey patches to fix issues in external libraries
-from scrapy import _monkeypatches
-del _monkeypatches

 from twisted import version as _txv
-twisted_version = (_txv.major, _txv.minor, _txv.micro)

 # Declare top-level shortcuts
 from scrapy.spiders import Spider
@ -36,4 +14,29 @@ from scrapy.http import Request, FormRequest
 from scrapy.selector import Selector
 from scrapy.item import Item, Field

+
+__all__ = [
+    '__version__', 'version_info', 'twisted_version', 'Spider',
+    'Request', 'FormRequest', 'Selector', 'Item', 'Field',
+]
+
+
+# Scrapy and Twisted versions
+__version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip()
+version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split('.'))
+twisted_version = (_txv.major, _txv.minor, _txv.micro)
+
+
+# Check minimum required Python version
+if sys.version_info < (3, 5, 2):
+    print("Scrapy %s requires Python 3.5.2" % __version__)
+    sys.exit(1)
+
+
+# Ignore noisy twisted deprecation warnings
+warnings.filterwarnings('ignore', category=DeprecationWarning, module='twisted')
+
+
+del pkgutil
 del sys
+del warnings
--- a/scrapy/_monkeypatches.py
+++ b/scrapy/_monkeypatches.py
@ -1,11 +0,0 @@
-import copyreg
-
-
-# Undo what Twisted's perspective broker adds to pickle register
-# to prevent bugs like Twisted#7989 while serializing requests
-import twisted.persisted.styles  # NOQA
-# Remove only entries with twisted serializers for non-twisted types.
-for k, v in frozenset(copyreg.dispatch_table.items()):
-    if not str(getattr(k, '__module__', '')).startswith('twisted') \
-            and str(getattr(v, '__module__', '')).startswith('twisted'):
-        copyreg.dispatch_table.pop(k)
--- a/scrapy/cmdline.py
+++ b/scrapy/cmdline.py
@ -165,6 +165,7 @@ if __name__ == '__main__':
    try:
        execute()
    finally:
-        # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect()
-        # on exit: http://doc.pypy.org/en/latest/cpython_differences.html?highlight=gc.collect#differences-related-to-garbage-collection-strategies
+        # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
+        # http://doc.pypy.org/en/latest/cpython_differences.html
+        # ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
        garbage_collect()
--- a/scrapy/commands/init.py
+++ b/scrapy/commands/init.py
@ -5,7 +5,7 @@ import os
 from optparse import OptionGroup
 from twisted.python import failure

-from scrapy.utils.conf import arglist_to_dict
+from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
 from scrapy.exceptions import UsageError


@ -23,7 +23,8 @@ class ScrapyCommand:
        self.settings = None  # set in scrapy.cmdline

    def set_crawler(self, crawler):
-        assert not hasattr(self, '_crawler'), "crawler already set"
+        if hasattr(self, '_crawler'):
+            raise RuntimeError("crawler already set")
        self._crawler = crawler

    def syntax(self):
@ -103,3 +104,27 @@ class ScrapyCommand:
        Entry point for running commands
        """
        raise NotImplementedError
+
+
+class BaseRunSpiderCommand(ScrapyCommand):
+    """
+    Common class used to share functionality between the crawl and runspider commands
+    """
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
+                          help="set spider argument (may be repeated)")
+        parser.add_option("-o", "--output", metavar="FILE", action="append",
+                          help="dump scraped items into FILE (use - for stdout)")
+        parser.add_option("-t", "--output-format", metavar="FORMAT",
+                          help="format to use for dumping items with -o")
+
+    def process_options(self, args, opts):
+        ScrapyCommand.process_options(self, args, opts)
+        try:
+            opts.spargs = arglist_to_dict(opts.spargs)
+        except ValueError:
+            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
+        if opts.output:
+            feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
+            self.settings.set('FEEDS', feeds, priority='cmdline')
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@ -1,9 +1,8 @@
-from scrapy.commands import ScrapyCommand
-from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
+from scrapy.commands import BaseRunSpiderCommand
 from scrapy.exceptions import UsageError


-class Command(ScrapyCommand):
+class Command(BaseRunSpiderCommand):

    requires_project = True

@ -13,25 +12,6 @@ class Command(ScrapyCommand):
    def short_desc(self):
        return "Run a spider"

-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE", action="append",
-                          help="dump scraped items into FILE (use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
-            self.settings.set('FEEDS', feeds, priority='cmdline')
-
    def run(self, args, opts):
        if len(args) < 1:
            raise UsageError()
--- a/scrapy/commands/fetch.py
+++ b/scrapy/commands/fetch.py
@ -27,8 +27,8 @@ class Command(ScrapyCommand):
        parser.add_option("--spider", dest="spider", help="use this spider")
        parser.add_option("--headers", dest="headers", action="store_true",
                          help="print response HTTP headers instead of body")
-        parser.add_option("--no-redirect", dest="no_redirect", action="store_true",
-                          default=False, help="do not handle HTTP 3xx status codes and print response as-is")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")

    def _print_headers(self, headers, prefix):
        for key, values in headers.items():
@ -49,8 +49,8 @@ class Command(ScrapyCommand):
    def run(self, args, opts):
        if len(args) != 1 or not is_url(args[0]):
            raise UsageError()
-        cb = lambda x: self._print_response(x, opts)
-        request = Request(args[0], callback=cb, dont_filter=True)
+        request = Request(args[0], callback=self._print_response,
+                          cb_kwargs={"opts": opts}, dont_filter=True)
        # by default, let the framework handle redirects,
        # i.e. command handles all codes expect 3xx
        if not opts.no_redirect:
--- a/scrapy/commands/genspider.py
+++ b/scrapy/commands/genspider.py
@ -90,8 +90,7 @@ class Command(ScrapyCommand):
            'module': module,
            'name': name,
            'domain': domain,
-            'classname': '%sSpider' % ''.join(s.capitalize() \
-                for s in module.split('_'))
+            'classname': '%sSpider' % ''.join(s.capitalize() for s in module.split('_'))
        }
        if self.settings.get('NEWSPIDER_MODULE'):
            spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
@ -102,8 +101,8 @@ class Command(ScrapyCommand):
        spider_file = "%s.py" % join(spiders_dir, module)
        shutil.copyfile(template_file, spider_file)
        render_templatefile(spider_file, **tvars)
-        print("Created spider %r using template %r " % (name, \
-            template_name), end=('' if spiders_module else '\n'))
+        print("Created spider %r using template %r "
+              % (name, template_name), end=('' if spiders_module else '\n'))
        if spiders_module:
            print("in module:\n  %s.%s" % (spiders_module.__name__, module))

--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@ -1,11 +1,11 @@
 import json
 import logging

+from itemadapter import is_item, ItemAdapter
 from w3lib.url import is_url

 from scrapy.commands import ScrapyCommand
 from scrapy.http import Request
-from scrapy.item import BaseItem
 from scrapy.utils import display
 from scrapy.utils.conf import arglist_to_dict
 from scrapy.utils.spider import iterate_spider_output, spidercls_for_request
@ -81,7 +81,7 @@ class Command(ScrapyCommand):
            items = self.items.get(lvl, [])

        print("# Scraped Items ", "-" * 60)
-        display.pprint([dict(x) for x in items], colorize=colour)
+        display.pprint([ItemAdapter(x).asdict() for x in items], colorize=colour)

    def print_requests(self, lvl=None, colour=True):
        if lvl is None:
@ -117,7 +117,7 @@ class Command(ScrapyCommand):
        items, requests = [], []

        for x in iterate_spider_output(callback(response, **cb_kwargs)):
-            if isinstance(x, (BaseItem, dict)):
+            if is_item(x):
                items.append(x)
            elif isinstance(x, Request):
                requests.append(x)
@ -146,9 +146,8 @@ class Command(ScrapyCommand):
            if not self.spidercls:
                logger.error('Unable to find spider for: %(url)s', {'url': url})

-        # Request requires callback argument as callable or None, not string
-        request = Request(url, None)
-        _start_requests = lambda s: [self.prepare_request(s, request, opts)]
+        def _start_requests(spider):
+            yield self.prepare_request(spider, Request(url), opts)
        self.spidercls.start_requests = _start_requests

    def start_parsing(self, url, opts):
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@ -3,9 +3,8 @@ import os
 from importlib import import_module

 from scrapy.utils.spider import iter_spider_classes
-from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
-from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
+from scrapy.commands import BaseRunSpiderCommand


 def _import_file(filepath):
@ -24,7 +23,7 @@ def _import_file(filepath):
    return module


-class Command(ScrapyCommand):
+class Command(BaseRunSpiderCommand):

    requires_project = False
    default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
@ -38,25 +37,6 @@ class Command(ScrapyCommand):
    def long_desc(self):
        return "Run the spider defined in the given file"

-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE", action="append",
-                          help="dump scraped items into FILE (use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            feeds = feed_process_params_from_cli(self.settings, opts.output, opts.output_format)
-            self.settings.set('FEEDS', feeds, priority='cmdline')
-
    def run(self, args, opts):
        if len(args) != 1:
            raise UsageError()
--- a/scrapy/commands/shell.py
+++ b/scrapy/commands/shell.py
@ -37,8 +37,8 @@ class Command(ScrapyCommand):
                          help="evaluate the code in the shell, print the result and exit")
        parser.add_option("--spider", dest="spider",
                          help="use this spider")
-        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", \
-            default=False, help="do not handle HTTP 3xx status codes and print response as-is")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")

    def update_vars(self, vars):
        """You can use this function to update the Scrapy objects that will be
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@ -1,5 +1,6 @@
 import re
 import os
+import stat
 import string
 from importlib import import_module
 from os.path import join, exists, abspath
@ -78,6 +79,29 @@ class Command(ScrapyCommand):
            else:
                copy2(srcname, dstname)
        copystat(src, dst)
+        self._set_rw_permissions(dst)
+
+    def _set_rw_permissions(self, path):
+        """
+        Sets permissions of a directory tree to +rw and +rwx for folders.
+        This is necessary if the start template files come without write
+        permissions.
+        """
+        mode_rw = (stat.S_IRUSR
+                   | stat.S_IWUSR
+                   | stat.S_IRGRP
+                   | stat.S_IROTH)
+
+        mode_x = (stat.S_IXUSR
+                  | stat.S_IXGRP
+                  | stat.S_IXOTH)
+
+        os.chmod(path, mode_rw | mode_x)
+        for root, dirs, files in os.walk(path):
+            for dir in dirs:
+                os.chmod(join(root, dir), mode_rw | mode_x)
+            for file in files:
+                os.chmod(join(root, file), mode_rw)

    def run(self, args, opts):
        if len(args) not in (1, 2):
@ -102,10 +126,8 @@ class Command(ScrapyCommand):
        move(join(project_dir, 'module'), join(project_dir, project_name))
        for paths in TEMPLATES_TO_RENDER:
            path = join(*paths)
-            tplfile = join(project_dir,
-                string.Template(path).substitute(project_name=project_name))
-            render_templatefile(tplfile, project_name=project_name,
-                ProjectName=string_camelcase(project_name))
+            tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name))
+            render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name))
        print("New Scrapy project '%s', using template directory '%s', "
              "created in:" % (project_name, self.templates_dir))
        print("    %s\n" % abspath(project_dir))
--- a/scrapy/contracts/init.py
+++ b/scrapy/contracts/init.py
@ -17,10 +17,10 @@ class ContractsManager:
            self.contracts[contract.name] = contract

    def tested_methods_from_spidercls(self, spidercls):
+        is_method = re.compile(r"^\s*@", re.MULTILINE).search
        methods = []
        for key, value in getmembers(spidercls):
-            if (callable(value) and value.__doc__ and
-                    re.search(r'^\s*@', value.__doc__, re.MULTILINE)):
+            if callable(value) and value.__doc__ and is_method(value.__doc__):
                methods.append(key)

        return methods
--- a/scrapy/contracts/default.py
+++ b/scrapy/contracts/default.py
@ -1,10 +1,10 @@
 import json

-from scrapy.item import BaseItem
-from scrapy.http import Request
-from scrapy.exceptions import ContractFail
+from itemadapter import is_item, ItemAdapter

 from scrapy.contracts import Contract
+from scrapy.exceptions import ContractFail
+from scrapy.http import Request


 # contracts
@ -48,19 +48,23 @@ class ReturnsContract(Contract):
    """

    name = 'returns'
-    objects = {
-        'request': Request,
-        'requests': Request,
-        'item': (BaseItem, dict),
-        'items': (BaseItem, dict),
+    object_type_verifiers = {
+        'request': lambda x: isinstance(x, Request),
+        'requests': lambda x: isinstance(x, Request),
+        'item': is_item,
+        'items': is_item,
    }

    def __init__(self, *args, **kwargs):
        super(ReturnsContract, self).__init__(*args, **kwargs)

-        assert len(self.args) in [1, 2, 3]
+        if len(self.args) not in [1, 2, 3]:
+            raise ValueError(
+                "Incorrect argument quantity: expected 1, 2 or 3, got %i"
+                % len(self.args)
+            )
        self.obj_name = self.args[0] or None
-        self.obj_type = self.objects[self.obj_name]
+        self.obj_type_verifier = self.object_type_verifiers[self.obj_name]

        try:
            self.min_bound = int(self.args[1])
@ -75,7 +79,7 @@ class ReturnsContract(Contract):
    def post_process(self, output):
        occurrences = 0
        for x in output:
-            if isinstance(x, self.obj_type):
+            if self.obj_type_verifier(x):
                occurrences += 1

        assertion = (self.min_bound <= occurrences <= self.max_bound)
@ -99,8 +103,8 @@ class ScrapesContract(Contract):

    def post_process(self, output):
        for x in output:
-            if isinstance(x, (BaseItem, dict)):
-                missing = [arg for arg in self.args if arg not in x]
+            if is_item(x):
+                missing = [arg for arg in self.args if arg not in ItemAdapter(x)]
                if missing:
-                    raise ContractFail(
-                        "Missing fields: %s" % ", ".join(missing))
+                    missing_str = ", ".join(missing)
+                    raise ContractFail("Missing fields: %s" % missing_str)
--- a/scrapy/core/downloader/contextfactory.py
+++ b/scrapy/core/downloader/contextfactory.py
@ -46,11 +46,12 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
        #
        # * getattr() for `_ssl_method` attribute for context factories
        #   not calling super(..., self).__init__
-        return CertificateOptions(verify=False,
-                    method=getattr(self, 'method',
-                                   getattr(self, '_ssl_method', None)),
+        return CertificateOptions(
+            verify=False,
+            method=getattr(self, 'method', getattr(self, '_ssl_method', None)),
            fixBrokenPeers=True,
-                    acceptableCiphers=self.tls_ciphers)
+            acceptableCiphers=self.tls_ciphers,
+        )

    # kept for old-style HTTP/1.0 downloader context twisted calls,
    # e.g. connectSSL()
@ -86,8 +87,8 @@ class BrowserLikeContextFactory(ScrapyClientContextFactory):
        #
        # This means that a website like https://www.cacert.org will be rejected
        # by default, since CAcert.org CA certificate is seldom shipped.
-        return optionsForClientTLS(hostname.decode("ascii"),
+        return optionsForClientTLS(
+            hostname=hostname.decode("ascii"),
            trustRoot=platformTrust(),
-                                   extraCertificateOptions={
-                                        'method': self._ssl_method,
-                                   })
+            extraCertificateOptions={'method': self._ssl_method},
+        )
--- a/scrapy/core/downloader/handlers/ftp.py
+++ b/scrapy/core/downloader/handlers/ftp.py
@ -86,19 +86,19 @@ class FTPDownloadHandler:
        password = request.meta.get("ftp_password", self.default_password)
        passive_mode = 1 if bool(request.meta.get("ftp_passive",
                                                  self.passive_mode)) else 0
-        creator = ClientCreator(reactor, FTPClient, user, password,
-            passive=passive_mode)
-        return creator.connectTCP(parsed_url.hostname, parsed_url.port or 21).addCallback(self.gotClient,
-                                request, unquote(parsed_url.path))
+        creator = ClientCreator(reactor, FTPClient, user, password, passive=passive_mode)
+        dfd = creator.connectTCP(parsed_url.hostname, parsed_url.port or 21)
+        return dfd.addCallback(self.gotClient, request, unquote(parsed_url.path))

    def gotClient(self, client, request, filepath):
        self.client = client
        protocol = ReceivedDataProtocol(request.meta.get("ftp_local_filename"))
-        return client.retrieveFile(filepath, protocol)\
-                .addCallbacks(callback=self._build_response,
+        return client.retrieveFile(filepath, protocol).addCallbacks(
+            callback=self._build_response,
            callbackArgs=(request, protocol),
            errback=self._failed,
-                        errbackArgs=(request,))
+            errbackArgs=(request,),
+        )

    def _build_response(self, result, request, protocol):
        self.result = result
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@ -1,5 +1,6 @@
 """Download handlers for http and https schemes"""

+import ipaddress
 import logging
 import re
 import warnings
@ -11,15 +12,17 @@ from urllib.parse import urldefrag
 from twisted.internet import defer, protocol, ssl
 from twisted.internet.endpoints import TCP4ClientEndpoint
 from twisted.internet.error import TimeoutError
+from twisted.python.failure import Failure
 from twisted.web.client import Agent, HTTPConnectionPool, ResponseDone, ResponseFailed, URI
 from twisted.web.http import _DataLoss, PotentialDataLoss
 from twisted.web.http_headers import Headers as TxHeaders
 from twisted.web.iweb import IBodyProducer, UNKNOWN_LENGTH
 from zope.interface import implementer

+from scrapy import signals
 from scrapy.core.downloader.tls import openssl_methods
 from scrapy.core.downloader.webclient import _parse
-from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.exceptions import ScrapyDeprecationWarning, StopDownload
 from scrapy.http import Headers
 from scrapy.responsetypes import responsetypes
 from scrapy.utils.misc import create_instance, load_object
@ -33,6 +36,8 @@ class HTTP11DownloadHandler:
    lazy = False

    def __init__(self, settings, crawler=None):
+        self._crawler = crawler
+
        from twisted.internet import reactor
        self._pool = HTTPConnectionPool(reactor, persistent=True)
        self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
@ -78,6 +83,7 @@ class HTTP11DownloadHandler:
            maxsize=getattr(spider, 'download_maxsize', self._default_maxsize),
            warnsize=getattr(spider, 'download_warnsize', self._default_warnsize),
            fail_on_dataloss=self._fail_on_dataloss,
+            crawler=self._crawler,
        )
        return agent.download_request(request)

@ -275,7 +281,7 @@ class ScrapyAgent:
    _TunnelingAgent = TunnelingAgent

    def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None,
-                 maxsize=0, warnsize=0, fail_on_dataloss=True):
+                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None):
        self._contextFactory = contextFactory
        self._connectTimeout = connectTimeout
        self._bindAddress = bindAddress
@ -284,6 +290,7 @@ class ScrapyAgent:
        self._warnsize = warnsize
        self._fail_on_dataloss = fail_on_dataloss
        self._txresponse = None
+        self._crawler = crawler

    def _get_agent(self, request, timeout):
        from twisted.internet import reactor
@ -341,20 +348,6 @@ class ScrapyAgent:
            headers.removeHeader(b'Proxy-Authorization')
        if request.body:
            bodyproducer = _RequestBodyProducer(request.body)
-        elif method == b'POST':
-            # Setting Content-Length: 0 even for POST requests is not a
-            # MUST per HTTP RFCs, but it's common behavior, and some
-            # servers require this, otherwise returning HTTP 411 Length required
-            #
-            # RFC 7230#section-3.3.2:
-            # "a Content-Length header field is normally sent in a POST
-            # request even when the value is 0 (indicating an empty payload body)."
-            #
-            # Twisted < 17 will not add "Content-Length: 0" by itself;
-            # Twisted >= 17 fixes this;
-            # Using a producer with an empty-string sends `0` as Content-Length
-            # for all versions of Twisted.
-            bodyproducer = _RequestBodyProducer(b'')
        else:
            bodyproducer = None
        start_time = time()
@ -387,7 +380,13 @@ class ScrapyAgent:
    def _cb_bodyready(self, txresponse, request):
        # deliverBody hangs for responses without body
        if txresponse.length == 0:
-            return txresponse, b'', None, None
+            return {
+                "txresponse": txresponse,
+                "body": b"",
+                "flags": None,
+                "certificate": None,
+                "ip_address": None,
+            }

        maxsize = request.meta.get('download_maxsize', self._maxsize)
        warnsize = request.meta.get('download_warnsize', self._warnsize)
@ -414,7 +413,15 @@ class ScrapyAgent:

        d = defer.Deferred(_cancel)
        txresponse.deliverBody(
-            _ResponseReader(d, txresponse, request, maxsize, warnsize, fail_on_dataloss)
+            _ResponseReader(
+                finished=d,
+                txresponse=txresponse,
+                request=request,
+                maxsize=maxsize,
+                warnsize=warnsize,
+                fail_on_dataloss=fail_on_dataloss,
+                crawler=self._crawler,
+            )
        )

        # save response for timeouts
@ -423,12 +430,21 @@ class ScrapyAgent:
        return d

    def _cb_bodydone(self, result, request, url):
-        txresponse, body, flags, certificate = result
-        status = int(txresponse.code)
-        headers = Headers(txresponse.headers.getAllRawHeaders())
-        respcls = responsetypes.from_args(headers=headers, url=url, body=body)
-        return respcls(url=url, status=status, headers=headers, body=body,
-                       flags=flags, certificate=certificate)
+        headers = Headers(result["txresponse"].headers.getAllRawHeaders())
+        respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
+        response = respcls(
+            url=url,
+            status=int(result["txresponse"].code),
+            headers=headers,
+            body=result["body"],
+            flags=result["flags"],
+            certificate=result["certificate"],
+            ip_address=result["ip_address"],
+        )
+        if result.get("failure"):
+            result["failure"].value.response = response
+            return result["failure"]
+        return response


@implementer(IBodyProducer)
@ -451,7 +467,7 @@ class _RequestBodyProducer:

 class _ResponseReader(protocol.Protocol):

-    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss):
+    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler):
        self._finished = finished
        self._txresponse = txresponse
        self._request = request
@ -463,12 +479,27 @@ class _ResponseReader(protocol.Protocol):
        self._reached_warnsize = False
        self._bytes_received = 0
        self._certificate = None
+        self._ip_address = None
+        self._crawler = crawler
+
+    def _finish_response(self, flags=None, failure=None):
+        self._finished.callback({
+            "txresponse": self._txresponse,
+            "body": self._bodybuf.getvalue(),
+            "flags": flags,
+            "certificate": self._certificate,
+            "ip_address": self._ip_address,
+            "failure": failure,
+        })

    def connectionMade(self):
        if self._certificate is None:
            with suppress(AttributeError):
                self._certificate = ssl.Certificate(self.transport._producer.getPeerCertificate())

+        if self._ip_address is None:
+            self._ip_address = ipaddress.ip_address(self.transport._producer.getPeer().host)
+
    def dataReceived(self, bodyBytes):
        # This maybe called several times after cancel was called with buffered data.
        if self._finished.called:
@ -477,6 +508,20 @@ class _ResponseReader(protocol.Protocol):
        self._bodybuf.write(bodyBytes)
        self._bytes_received += len(bodyBytes)

+        bytes_received_result = self._crawler.signals.send_catch_log(
+            signal=signals.bytes_received,
+            data=bodyBytes,
+            request=self._request,
+            spider=self._crawler.spider,
+        )
+        for handler, result in bytes_received_result:
+            if isinstance(result, Failure) and isinstance(result.value, StopDownload):
+                logger.debug("Download stopped for %(request)s from signal handler %(handler)s",
+                             {"request": self._request, "handler": handler.__qualname__})
+                self.transport._producer.loseConnection()
+                failure = result if result.value.fail else None
+                self._finish_response(flags=["download_stopped"], failure=failure)
+
        if self._maxsize and self._bytes_received > self._maxsize:
            logger.error("Received (%(bytes)s) bytes larger than download "
                         "max size (%(maxsize)s) in request %(request)s.",
@ -498,18 +543,17 @@ class _ResponseReader(protocol.Protocol):
        if self._finished.called:
            return

-        body = self._bodybuf.getvalue()
        if reason.check(ResponseDone):
-            self._finished.callback((self._txresponse, body, None, self._certificate))
+            self._finish_response()
            return

        if reason.check(PotentialDataLoss):
-            self._finished.callback((self._txresponse, body, ['partial'], self._certificate))
+            self._finish_response(flags=["partial"])
            return

        if reason.check(ResponseFailed) and any(r.check(_DataLoss) for r in reason.value.reasons):
            if not self._fail_on_dataloss:
-                self._finished.callback((self._txresponse, body, ['dataloss'], self._certificate))
+                self._finish_response(flags=["dataloss"])
                return

            elif not self._fail_on_dataloss_warned:
--- a/scrapy/core/downloader/handlers/s3.py
+++ b/scrapy/core/downloader/handlers/s3.py
@ -105,6 +105,7 @@ class S3DownloadHandler:
                key=unquote(p.path),
                query_args=unquote(p.query),
                headers=request.headers,
-                    data=request.body)
+                data=request.body,
+            )
            request = request.replace(url=url, headers=signed_headers)
        return self._download_http(request, spider)
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@ -35,38 +35,45 @@ class DownloaderMiddlewareManager(MiddlewareManager):
            for method in self.methods['process_request']:
                response = yield deferred_from_coro(method(request=request, spider=spider))
                if response is not None and not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_request must return None, Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, response.__class__.__name__))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_request must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, response.__class__.__name__)
+                    )
                if response:
-                    defer.returnValue(response)
-            defer.returnValue((yield download_func(request=request, spider=spider)))
+                    return response
+            return (yield download_func(request=request, spider=spider))

        @defer.inlineCallbacks
        def process_response(response):
-            assert response is not None, 'Received None in process_response'
-            if isinstance(response, Request):
-                defer.returnValue(response)
+            if response is None:
+                raise TypeError("Received None in process_response")
+            elif isinstance(response, Request):
+                return response

            for method in self.methods['process_response']:
                response = yield deferred_from_coro(method(request=request, response=response, spider=spider))
                if not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_response must return Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, type(response)))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_response must return Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                if isinstance(response, Request):
-                    defer.returnValue(response)
-            defer.returnValue(response)
+                    return response
+            return response

        @defer.inlineCallbacks
-        def process_exception(_failure):
-            exception = _failure.value
+        def process_exception(failure):
+            exception = failure.value
            for method in self.methods['process_exception']:
                response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider))
                if response is not None and not isinstance(response, (Response, Request)):
-                    raise _InvalidOutput('Middleware %s.process_exception must return None, Response or Request, got %s' % \
-                                         (method.__self__.__class__.__name__, type(response)))
+                    raise _InvalidOutput(
+                        "Middleware %s.process_exception must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                if response:
-                    defer.returnValue(response)
-            defer.returnValue(_failure)
+                    return response
+            return failure

        deferred = mustbe_deferred(process_request, request)
        deferred.addErrback(process_exception)
--- a/scrapy/core/downloader/webclient.py
+++ b/scrapy/core/downloader/webclient.py
@ -14,13 +14,12 @@ from scrapy.responsetypes import responsetypes
 def _parsed_url_args(parsed):
    # Assume parsed is urlparse-d from Request.url,
    # which was passed via safe_url_string and is ascii-only.
-    b = lambda s: to_bytes(s, encoding='ascii')
    path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
-    path = b(path)
-    host = b(parsed.hostname)
+    path = to_bytes(path, encoding="ascii")
+    host = to_bytes(parsed.hostname, encoding="ascii")
    port = parsed.port
-    scheme = b(parsed.scheme)
-    netloc = b(parsed.netloc)
+    scheme = to_bytes(parsed.scheme, encoding="ascii")
+    netloc = to_bytes(parsed.netloc, encoding="ascii")
    if port is None:
        port = 443 if scheme == b'https' else 80
    return scheme, netloc, host, port, path
@ -89,8 +88,8 @@ class ScrapyHTTPPageGetter(HTTPClient):
            self.transport.stopProducing()

        self.factory.noPage(
-                defer.TimeoutError("Getting %s took longer than %s seconds." %
-                                   (self.factory.url, self.factory.timeout)))
+            defer.TimeoutError("Getting %s took longer than %s seconds."
+                               % (self.factory.url, self.factory.timeout)))


 class ScrapyHTTPClientFactory(HTTPClientFactory):
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@ -73,7 +73,8 @@ class ExecutionEngine:
    @defer.inlineCallbacks
    def start(self):
        """Start the execution engine"""
-        assert not self.running, "Engine already running"
+        if self.running:
+            raise RuntimeError("Engine already running")
        self.start_time = time()
        yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
        self.running = True
@ -82,7 +83,8 @@ class ExecutionEngine:

    def stop(self):
        """Stop the execution engine gracefully"""
-        assert self.running, "Engine not running"
+        if not self.running:
+            raise RuntimeError("Engine not running")
        self.running = False
        dfd = self._close_all_spiders()
        return dfd.addBoth(lambda _: self._finish_stopping_engine())
@ -165,7 +167,11 @@ class ExecutionEngine:
        return d

    def _handle_downloader_output(self, response, request, spider):
-        assert isinstance(response, (Request, Response, Failure)), response
+        if not isinstance(response, (Request, Response, Failure)):
+            raise TypeError(
+                "Incorrect type: expected Request, Response or Failure, got %s: %r"
+                % (type(response), response)
+            )
        # downloader middleware can return requests (for example, redirects)
        if isinstance(response, Request):
            self.crawl(response, spider)
@ -205,17 +211,15 @@ class ExecutionEngine:
        return not bool(self.slot)

    def crawl(self, request, spider):
-        assert spider in self.open_spiders, \
-            "Spider %r not opened when crawling: %s" % (spider.name, request)
+        if spider not in self.open_spiders:
+            raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request))
        self.schedule(request, spider)
        self.slot.nextcall.schedule()

    def schedule(self, request, spider):
-        self.signals.send_catch_log(signal=signals.request_scheduled,
-                request=request, spider=spider)
+        self.signals.send_catch_log(signals.request_scheduled, request=request, spider=spider)
        if not self.slot.scheduler.enqueue_request(request):
-            self.signals.send_catch_log(signal=signals.request_dropped,
-                                        request=request, spider=spider)
+            self.signals.send_catch_log(signals.request_dropped, request=request, spider=spider)

    def download(self, request, spider):
        d = self._download(request, spider)
@ -224,21 +228,24 @@ class ExecutionEngine:

    def _downloaded(self, response, slot, request, spider):
        slot.remove_request(request)
-        return self.download(response, spider) \
-                if isinstance(response, Request) else response
+        return self.download(response, spider) if isinstance(response, Request) else response

    def _download(self, request, spider):
        slot = self.slot
        slot.add_request(request)

        def _on_success(response):
-            assert isinstance(response, (Response, Request))
+            if not isinstance(response, (Response, Request)):
+                raise TypeError(
+                    "Incorrect type: expected Response or Request, got %s: %r"
+                    % (type(response), response)
+                )
            if isinstance(response, Response):
                response.request = request  # tie request to response received
                logkws = self.logformatter.crawled(request, response, spider)
                if logkws is not None:
                    logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
-                self.signals.send_catch_log(signal=signals.response_received,
+                self.signals.send_catch_log(signals.response_received,
                                            response=response, request=request, spider=spider)
            return response

@ -253,8 +260,8 @@ class ExecutionEngine:

    @defer.inlineCallbacks
    def open_spider(self, spider, start_requests=(), close_if_idle=True):
-        assert self.has_capacity(), "No free spider slot when opening %r" % \
-            spider.name
+        if not self.has_capacity():
+            raise RuntimeError("No free spider slot when opening %r" % spider.name)
        logger.info("Spider opened", extra={'spider': spider})
        nextcall = CallLaterOnce(self._next_request, spider)
        scheduler = self.scheduler_cls.from_crawler(self.crawler)
@ -277,10 +284,8 @@ class ExecutionEngine:
        next loop and this function is guaranteed to be called (at least) once
        again for this spider.
        """
-        res = self.signals.send_catch_log(signal=signals.spider_idle, \
-            spider=spider, dont_log=DontCloseSpider)
-        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
-                for _, x in res):
+        res = self.signals.send_catch_log(signals.spider_idle, spider=spider, dont_log=DontCloseSpider)
+        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) for _, x in res):
            return

        if self.spider_is_idle(spider):
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@ -4,18 +4,18 @@ extracts information from them"""
 import logging
 from collections import deque

-from twisted.python.failure import Failure
+from itemadapter import is_item
 from twisted.internet import defer
+from twisted.python.failure import Failure

-from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errback
-from scrapy.utils.spider import iterate_spider_output
-from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
-from scrapy.utils.log import logformatter_adapter, failure_to_exc_info
-from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
 from scrapy import signals
-from scrapy.http import Request, Response
-from scrapy.item import BaseItem
 from scrapy.core.spidermw import SpiderMiddlewareManager
+from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
+from scrapy.http import Request, Response
+from scrapy.utils.defer import defer_result, defer_succeed, iter_errback, parallel
+from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
+from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
+from scrapy.utils.spider import iterate_spider_output


 logger = logging.getLogger(__name__)
@ -123,7 +123,11 @@ class Scraper:
    def _scrape(self, response, request, spider):
        """Handle the downloaded response or failure through the spider
        callback/errback"""
-        assert isinstance(response, (Response, Failure))
+        if not isinstance(response, (Response, Failure)):
+            raise TypeError(
+                "Incorrect type: expected Response or Failure, got %s: %r"
+                % (type(response), response)
+            )

        dfd = self._scrape2(response, request, spider)  # returns spider's processed output
        dfd.addErrback(self.handle_spider_error, request, response, spider)
@ -187,7 +191,7 @@ class Scraper:
        """
        if isinstance(output, Request):
            self.crawler.engine.crawl(request=output, spider=spider)
-        elif isinstance(output, (BaseItem, dict)):
+        elif is_item(output):
            self.slot.itemproc_size += 1
            dfd = self.itemproc.process_item(output, spider)
            dfd.addBoth(self._itemproc_finished, output, response, spider)
@ -196,10 +200,11 @@ class Scraper:
            pass
        else:
            typename = type(output).__name__
-            logger.error('Spider must return Request, BaseItem, dict or None, '
-                         'got %(typename)r in %(request)s',
+            logger.error(
+                'Spider must return request, item, or None, got %(typename)r in %(request)s',
                {'request': request, 'typename': typename},
-                         extra={'spider': spider})
+                extra={'spider': spider},
+            )

    def _log_download_errors(self, spider_failure, download_failure, request, spider):
        """Log and silence errors that come from the engine (typically download
--- a/scrapy/core/spidermw.py
+++ b/scrapy/core/spidermw.py
@ -19,7 +19,7 @@ def _isiterable(possible_iterator):


 def _fname(f):
-    return "%s.%s".format(
+    return "{}.{}".format(
        f.__self__.__class__.__name__,
        f.__func__.__name__
    )
--- a/scrapy/crawler.py
+++ b/scrapy/crawler.py
@ -78,7 +78,8 @@ class Crawler:

    @defer.inlineCallbacks
    def crawl(self, *args, **kwargs):
-        assert not self.crawling, "Crawling already taking place"
+        if self.crawling:
+            raise RuntimeError("Crawling already taking place")
        self.crawling = True

        try:
--- a/scrapy/downloadermiddlewares/ajaxcrawl.py
+++ b/scrapy/downloadermiddlewares/ajaxcrawl.py
@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import re
 import logging

--- a/scrapy/downloadermiddlewares/cookies.py
+++ b/scrapy/downloadermiddlewares/cookies.py
@ -29,8 +29,7 @@ class CookiesMiddleware:

        cookiejarkey = request.meta.get("cookiejar")
        jar = self.jars[cookiejarkey]
-        cookies = self._get_request_cookies(jar, request)
-        for cookie in cookies:
+        for cookie in self._get_request_cookies(jar, request):
            jar.set_cookie_if_ok(cookie, request)

        # set Cookie header
@ -68,28 +67,65 @@ class CookiesMiddleware:
                msg = "Received cookies from: {}\n{}".format(response, cookies)
                logger.debug(msg, extra={'spider': spider})

-    def _format_cookie(self, cookie):
-        # build cookie string
-        cookie_str = '%s=%s' % (cookie['name'], cookie['value'])
-
-        if cookie.get('path', None):
-            cookie_str += '; Path=%s' % cookie['path']
-        if cookie.get('domain', None):
-            cookie_str += '; Domain=%s' % cookie['domain']
+    def _format_cookie(self, cookie, request):
+        """
+        Given a dict consisting of cookie components, return its string representation.
+        Decode from bytes if necessary.
+        """
+        decoded = {}
+        for key in ("name", "value", "path", "domain"):
+            if not cookie.get(key):
+                if key in ("name", "value"):
+                    msg = "Invalid cookie found in request {}: {} ('{}' is missing)"
+                    logger.warning(msg.format(request, cookie, key))
+                    return
+                continue
+            if isinstance(cookie[key], str):
+                decoded[key] = cookie[key]
+            else:
+                try:
+                    decoded[key] = cookie[key].decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
+                                   request, cookie)
+                    decoded[key] = cookie[key].decode("latin1", errors="replace")

+        cookie_str = "{}={}".format(decoded.pop("name"), decoded.pop("value"))
+        for key, value in decoded.items():  # path, domain
+            cookie_str += "; {}={}".format(key.capitalize(), value)
        return cookie_str

    def _get_request_cookies(self, jar, request):
-        if isinstance(request.cookies, dict):
-            cookie_list = [
-                {'name': k, 'value': v}
-                for k, v in request.cookies.items()
-            ]
-        else:
-            cookie_list = request.cookies
-
-        cookies = [self._format_cookie(x) for x in cookie_list]
-        headers = {'Set-Cookie': cookies}
-        response = Response(request.url, headers=headers)
-
+        """
+        Extract cookies from a Request. Values from the `Request.cookies` attribute
+        take precedence over values from the `Cookie` request header.
+        """
+        def get_cookies_from_header(jar, request):
+            cookie_header = request.headers.get("Cookie")
+            if not cookie_header:
+                return []
+            cookie_gen_bytes = (s.strip() for s in cookie_header.split(b";"))
+            cookie_list_unicode = []
+            for cookie_bytes in cookie_gen_bytes:
+                try:
+                    cookie_unicode = cookie_bytes.decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
+                                   request, cookie_bytes)
+                    cookie_unicode = cookie_bytes.decode("latin1", errors="replace")
+                cookie_list_unicode.append(cookie_unicode)
+            response = Response(request.url, headers={"Set-Cookie": cookie_list_unicode})
            return jar.make_cookies(response, request)
+
+        def get_cookies_from_attribute(jar, request):
+            if not request.cookies:
+                return []
+            elif isinstance(request.cookies, dict):
+                cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
+            else:
+                cookies = request.cookies
+            formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
+            response = Response(request.url, headers={"Set-Cookie": formatted})
+            return jar.make_cookies(response, request)
+
+        return get_cookies_from_header(jar, request) + get_cookies_from_attribute(jar, request)
--- a/scrapy/downloadermiddlewares/redirect.py
+++ b/scrapy/downloadermiddlewares/redirect.py
@ -60,11 +60,14 @@ class RedirectMiddleware(BaseRedirectMiddleware):
    Handle redirection of requests based on response status
    and meta-refresh html tag.
    """
+
    def process_response(self, request, response, spider):
-        if (request.meta.get('dont_redirect', False) or
-                response.status in getattr(spider, 'handle_httpstatus_list', []) or
-                response.status in request.meta.get('handle_httpstatus_list', []) or
-                request.meta.get('handle_httpstatus_all', False)):
+        if (
+            request.meta.get('dont_redirect', False)
+            or response.status in getattr(spider, 'handle_httpstatus_list', [])
+            or response.status in request.meta.get('handle_httpstatus_list', [])
+            or request.meta.get('handle_httpstatus_all', False)
+        ):
            return response

        allowed_status = (301, 302, 303, 307, 308)
--- a/scrapy/downloadermiddlewares/retry.py
+++ b/scrapy/downloadermiddlewares/retry.py
@ -12,9 +12,15 @@ once the spider has finished crawling all regular (non failed) pages.
 import logging

 from twisted.internet import defer
-from twisted.internet.error import TimeoutError, DNSLookupError, \
-        ConnectionRefusedError, ConnectionDone, ConnectError, \
-        ConnectionLost, TCPTimedOutError
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
 from twisted.web.client import ResponseFailed

 from scrapy.exceptions import NotConfigured
--- a/scrapy/dupefilters.py
+++ b/scrapy/dupefilters.py
@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
    def log(self, request, spider):
        if self.debug:
            msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
-            args = {'request': request, 'referer': referer_str(request) }
+            args = {'request': request, 'referer': referer_str(request)}
            self.logger.debug(msg, args, extra={'spider': spider})
        elif self.logdupes:
            msg = ("Filtered duplicate request: %(request)s"
--- a/scrapy/exceptions.py
+++ b/scrapy/exceptions.py
@ -41,6 +41,18 @@ class CloseSpider(Exception):
        self.reason = reason


+class StopDownload(Exception):
+    """
+    Stop the download of the body for a given response.
+    The 'fail' boolean parameter indicates whether or not the resulting partial response
+    should be handled by the request errback. Note that 'fail' is a keyword-only argument.
+    """
+
+    def __init__(self, *, fail=True):
+        super().__init__()
+        self.fail = fail
+
+
 # Items


@ -59,6 +71,7 @@ class NotSupported(Exception):

 class UsageError(Exception):
    """To indicate a command-line usage error"""
+
    def __init__(self, *a, **kw):
        self.print_help = kw.pop('print_help', True)
        super(UsageError, self).__init__(*a, **kw)
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@ -4,16 +4,18 @@ Item Exporters are used to export/serialize items into different formats.

 import csv
 import io
-import pprint
 import marshal
-import warnings
 import pickle
+import pprint
+import warnings
 from xml.sax.saxutils import XMLGenerator

-from scrapy.utils.serialize import ScrapyJSONEncoder
-from scrapy.utils.python import to_bytes, to_unicode, is_listlike
-from scrapy.item import BaseItem
+from itemadapter import is_item, ItemAdapter
+
 from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.item import _BaseItem
+from scrapy.utils.python import is_listlike, to_bytes, to_unicode
+from scrapy.utils.serialize import ScrapyJSONEncoder


 __all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
@ -56,11 +58,14 @@ class BaseItemExporter:
        """Return the fields to export as an iterable of tuples
        (name, serialized_value)
        """
+        item = ItemAdapter(item)
+
        if include_empty is None:
            include_empty = self.export_empty_fields
+
        if self.fields_to_export is None:
-            if include_empty and not isinstance(item, dict):
-                field_iter = item.fields.keys()
+            if include_empty:
+                field_iter = item.field_names()
            else:
                field_iter = item.keys()
        else:
@ -71,8 +76,8 @@ class BaseItemExporter:

        for field_name in field_iter:
            if field_name in item:
-                field = {} if isinstance(item, dict) else item.fields[field_name]
-                value = self.serialize_field(field, field_name, item[field_name])
+                field_meta = item.get_field_meta(field_name)
+                value = self.serialize_field(field_meta, field_name, item[field_name])
            else:
                value = default_value

@ -250,7 +255,7 @@ class CsvItemExporter(BaseItemExporter):

 class PickleItemExporter(BaseItemExporter):

-    def __init__(self, file, protocol=2, **kwargs):
+    def __init__(self, file, protocol=4, **kwargs):
        super().__init__(**kwargs)
        self.file = file
        self.protocol = protocol
@ -297,6 +302,7 @@ class PythonItemExporter(BaseItemExporter):

    .. _msgpack: https://pypi.org/project/msgpack/
    """
+
    def _configure(self, options, dont_fail=False):
        self.binary = options.pop('binary', True)
        super(PythonItemExporter, self)._configure(options, dont_fail)
@ -312,24 +318,24 @@ class PythonItemExporter(BaseItemExporter):
        return serializer(value)

    def _serialize_value(self, value):
-        if isinstance(value, BaseItem):
+        if isinstance(value, _BaseItem):
            return self.export_item(value)
-        if isinstance(value, dict):
-            return dict(self._serialize_dict(value))
-        if is_listlike(value):
+        elif is_item(value):
+            return dict(self._serialize_item(value))
+        elif is_listlike(value):
            return [self._serialize_value(v) for v in value]
        encode_func = to_bytes if self.binary else to_unicode
        if isinstance(value, (str, bytes)):
            return encode_func(value, encoding=self.encoding)
        return value

-    def _serialize_dict(self, value):
-        for key, val in value.items():
+    def _serialize_item(self, item):
+        for key, value in ItemAdapter(item).items():
            key = to_bytes(key) if self.binary else key
-            yield key, self._serialize_value(val)
+            yield key, self._serialize_value(value)

    def export_item(self, item):
        result = dict(self._get_serialized_fields(item))
        if self.binary:
-            result = dict(self._serialize_dict(result))
+            result = dict(self._serialize_item(result))
        return result
--- a/scrapy/extensions/feedexport.py
+++ b/scrapy/extensions/feedexport.py
@ -270,18 +270,29 @@ class FeedExporter:
            if not slot.itemcount and not slot.store_empty:
                # We need to call slot.storage.store nonetheless to get the file
                # properly closed.
-                return defer.maybeDeferred(slot.storage.store, slot.file)
+                d = defer.maybeDeferred(slot.storage.store, slot.file)
+                deferred_list.append(d)
+                continue
            slot.finish_exporting()
            logfmt = "%s %%(format)s feed (%%(itemcount)d items) in: %%(uri)s"
            log_args = {'format': slot.format,
                        'itemcount': slot.itemcount,
                        'uri': slot.uri}
            d = defer.maybeDeferred(slot.storage.store, slot.file)
-            d.addCallback(lambda _: logger.info(logfmt % "Stored", log_args,
-                                                extra={'spider': spider}))
-            d.addErrback(lambda f: logger.error(logfmt % "Error storing", log_args,
-                                                exc_info=failure_to_exc_info(f),
-                                                extra={'spider': spider}))
+
+            # Use `largs=log_args` to copy log_args into function's scope
+            # instead of using `log_args` from the outer scope
+            d.addCallback(
+                lambda _, largs=log_args: logger.info(
+                    logfmt % "Stored", largs, extra={'spider': spider}
+                )
+            )
+            d.addErrback(
+                lambda f, largs=log_args: logger.error(
+                    logfmt % "Error storing", largs,
+                    exc_info=failure_to_exc_info(f), extra={'spider': spider}
+                )
+            )
            deferred_list.append(d)
        return defer.DeferredList(deferred_list) if deferred_list else None

--- a/scrapy/extensions/httpcache.py
+++ b/scrapy/extensions/httpcache.py
@ -46,9 +46,10 @@ class RFC2616Policy:
    def __init__(self, settings):
        self.always_store = settings.getbool('HTTPCACHE_ALWAYS_STORE')
        self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
-        self.ignore_response_cache_controls = [to_bytes(cc) for cc in
-            settings.getlist('HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS')]
        self._cc_parsed = WeakKeyDictionary()
+        self.ignore_response_cache_controls = [
+            to_bytes(cc) for cc in settings.getlist('HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS')
+        ]

    def _parse_cachecontrol(self, r):
        if r not in self._cc_parsed:
@ -250,7 +251,7 @@ class DbmCacheStorage:
            'headers': dict(response.headers),
            'body': response.body,
        }
-        self.db['%s_data' % key] = pickle.dumps(data, protocol=2)
+        self.db['%s_data' % key] = pickle.dumps(data, protocol=4)
        self.db['%s_time' % key] = str(time())

    def _read_data(self, spider, request):
@ -317,7 +318,7 @@ class FilesystemCacheStorage:
        with self._open(os.path.join(rpath, 'meta'), 'wb') as f:
            f.write(to_bytes(repr(metadata)))
        with self._open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
-            pickle.dump(metadata, f, protocol=2)
+            pickle.dump(metadata, f, protocol=4)
        with self._open(os.path.join(rpath, 'response_headers'), 'wb') as f:
            f.write(headers_dict_to_raw(response.headers))
        with self._open(os.path.join(rpath, 'response_body'), 'wb') as f:
--- a/scrapy/extensions/spiderstate.py
+++ b/scrapy/extensions/spiderstate.py
@ -26,7 +26,7 @@ class SpiderState:
    def spider_closed(self, spider):
        if self.jobdir:
            with open(self.statefn, 'wb') as f:
-                pickle.dump(spider.state, f, protocol=2)
+                pickle.dump(spider.state, f, protocol=4)

    def spider_opened(self, spider):
        if self.jobdir and os.path.exists(self.statefn):
--- a/scrapy/extensions/telnet.py
+++ b/scrapy/extensions/telnet.py
@ -76,8 +76,10 @@ class TelnetConsole(protocol.ServerFactory):
            """An implementation of IPortal"""
            @defers
            def login(self_, credentials, mind, *interfaces):
-                if not (credentials.username == self.username.encode('utf8') and
-                        credentials.checkPassword(self.password.encode('utf8'))):
+                if not (
+                    credentials.username == self.username.encode('utf8')
+                    and credentials.checkPassword(self.password.encode('utf8'))
+                ):
                    raise ValueError("Invalid credentials")

                protocol = telnet.TelnetBootstrapProtocol(
--- a/scrapy/http/request/init.py
+++ b/scrapy/http/request/init.py
@ -24,7 +24,8 @@ class Request(object_ref):
        self.method = str(method).upper()
        self._set_url(url)
        self._set_body(body)
-        assert isinstance(priority, int), "Request priority not an integer: %r" % priority
+        if not isinstance(priority, int):
+            raise TypeError("Request priority not an integer: %r" % priority)
        self.priority = priority

        if callback is not None and not callable(callback):
@ -129,6 +130,9 @@ class Request(object_ref):
                     :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
                     may modify the :class:`~scrapy.http.Request` object.

+        To translate a cURL command into a Scrapy request,
+        you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
       """
        request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
        request_kwargs.update(kwargs)
--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@ -178,12 +178,11 @@ def _get_clickable(clickdata, form):
    if the latter is given. If not, it returns the first
    clickable element found
    """
-    clickables = [
-        el for el in form.xpath(
+    clickables = list(form.xpath(
        'descendant::input[re:test(@type, "^(submit|image)$", "i")]'
        '|descendant::button[not(@type) or re:test(@type, "^submit$", "i")]',
-            namespaces={"re": "http://exslt.org/regular-expressions"})
-        ]
+        namespaces={"re": "http://exslt.org/regular-expressions"}
+    ))
    if not clickables:
        return

--- a/scrapy/http/response/init.py
+++ b/scrapy/http/response/init.py
@ -17,7 +17,8 @@ from scrapy.utils.trackref import object_ref

 class Response(object_ref):

-    def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None, certificate=None):
+    def __init__(self, url, status=200, headers=None, body=b'', flags=None,
+                 request=None, certificate=None, ip_address=None):
        self.headers = Headers(headers or {})
        self.status = int(status)
        self._set_body(body)
@ -25,6 +26,7 @@ class Response(object_ref):
        self.request = request
        self.flags = [] if flags is None else list(flags)
        self.certificate = certificate
+        self.ip_address = ip_address

    @property
    def cb_kwargs(self):
@ -87,7 +89,8 @@ class Response(object_ref):
        """Create a new Response with the same attributes except for those
        given new values.
        """
-        for x in ['url', 'status', 'headers', 'body', 'request', 'flags', 'certificate']:
+        for x in ['url', 'status', 'headers', 'body',
+                  'request', 'flags', 'certificate', 'ip_address']:
            kwargs.setdefault(x, getattr(self, x))
        cls = kwargs.pop('cls', self.__class__)
        return cls(*args, **kwargs)
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@ -5,6 +5,8 @@ discovering (through HTTP headers) to base Response class.
 See documentation in docs/topics/request-response.rst
 """

+import json
+import warnings
 from contextlib import suppress
 from typing import Generator
 from urllib.parse import urljoin
@ -14,15 +16,19 @@ from w3lib.encoding import (html_body_declared_encoding, html_to_unicode,
                            http_content_type_encoding, resolve_encoding)
 from w3lib.html import strip_html5_whitespace

+from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.http import Request
 from scrapy.http.response import Response
 from scrapy.utils.python import memoizemethod_noargs, to_unicode
 from scrapy.utils.response import get_base_url

+_NONE = object()
+

 class TextResponse(Response):

    _DEFAULT_ENCODING = 'ascii'
+    _cached_decoded_json = _NONE

    def __init__(self, *args, **kwargs):
        self._encoding = kwargs.pop('encoding', None)
@ -61,8 +67,21 @@ class TextResponse(Response):

    def body_as_unicode(self):
        """Return body as unicode"""
+        warnings.warn('Response.body_as_unicode() is deprecated, '
+                      'please use Response.text instead.',
+                      ScrapyDeprecationWarning, stacklevel=2)
        return self.text

+    def json(self):
+        """
+        .. versionadded:: 2.2
+
+        Deserialize a JSON document to a Python object.
+        """
+        if self._cached_decoded_json is _NONE:
+            self._cached_decoded_json = json.loads(self.text)
+        return self._cached_decoded_json
+
    @property
    def text(self):
        """ Body as unicode """
--- a/scrapy/item.py
+++ b/scrapy/item.py
@ -14,28 +14,39 @@ from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.trackref import object_ref


-class BaseItem(object_ref):
-    """Base class for all scraped items.
-
-    In Scrapy, an object is considered an *item* if it is an instance of either
-    :class:`BaseItem` or :class:`dict`. For example, when the output of a
-    spider callback is evaluated, only instances of :class:`BaseItem` or
-    :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
-
-    If you need instances of a custom class to be considered items by Scrapy,
-    you must inherit from either :class:`BaseItem` or :class:`dict`.
-
-    Unlike instances of :class:`dict`, instances of :class:`BaseItem` may be
-    :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
+class _BaseItem(object_ref):
+    """
+    Temporary class used internally to avoid the deprecation
+    warning raised by isinstance checks using BaseItem.
    """
    pass


+class _BaseItemMeta(ABCMeta):
+    def __instancecheck__(cls, instance):
+        if cls is BaseItem:
+            warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super().__instancecheck__(instance)
+
+
+class BaseItem(_BaseItem, metaclass=_BaseItemMeta):
+    """
+    Deprecated, please use :class:`scrapy.item.Item` instead
+    """
+
+    def __new__(cls, *args, **kwargs):
+        if issubclass(cls, BaseItem) and not issubclass(cls, (Item, DictItem)):
+            warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super(BaseItem, cls).__new__(cls, *args, **kwargs)
+
+
 class Field(dict):
    """Container of field metadata"""


-class ItemMeta(ABCMeta):
+class ItemMeta(_BaseItemMeta):
    """Metaclass_ of :class:`Item` that handles field definitions.

    .. _metaclass: https://realpython.com/python-metaclasses
@ -68,8 +79,7 @@ class DictItem(MutableMapping, BaseItem):

    def __new__(cls, *args, **kwargs):
        if issubclass(cls, DictItem) and not issubclass(cls, Item):
-            warn('scrapy.item.DictItem is deprecated, please use '
-                 'scrapy.item.Item instead',
+            warn('scrapy.item.DictItem is deprecated, please use scrapy.item.Item instead',
                 ScrapyDeprecationWarning, stacklevel=2)
        return super(DictItem, cls).__new__(cls, *args, **kwargs)

@ -86,8 +96,7 @@ class DictItem(MutableMapping, BaseItem):
        if key in self.fields:
            self._values[key] = value
        else:
-            raise KeyError("%s does not support field: %s" %
-                (self.__class__.__name__, key))
+            raise KeyError("%s does not support field: %s" % (self.__class__.__name__, key))

    def __delitem__(self, key):
        del self._values[key]
@ -99,8 +108,7 @@ class DictItem(MutableMapping, BaseItem):

    def __setattr__(self, name, value):
        if not name.startswith('_'):
-            raise AttributeError("Use item[%r] = %r to set field value" %
-                (name, value))
+            raise AttributeError("Use item[%r] = %r to set field value" % (name, value))
        super(DictItem, self).__setattr__(name, value)

    def __len__(self):
@ -121,12 +129,30 @@ class DictItem(MutableMapping, BaseItem):
        return self.__class__(self)

    def deepcopy(self):
-        """Return a `deep copy`_ of this item.
-
-        .. _deep copy: https://docs.python.org/library/copy.html#copy.deepcopy
+        """Return a :func:`~copy.deepcopy` of this item.
        """
        return deepcopy(self)


 class Item(DictItem, metaclass=ItemMeta):
-    pass
+    """
+    Base class for scraped items.
+
+    In Scrapy, an object is considered an ``item`` if it is an instance of either
+    :class:`Item` or :class:`dict`, or any subclass. For example, when the output of a
+    spider callback is evaluated, only instances of :class:`Item` or
+    :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
+
+    If you need instances of a custom class to be considered items by Scrapy,
+    you must inherit from either :class:`Item` or :class:`dict`.
+
+    Items must declare :class:`Field` attributes, which are processed and stored
+    in the ``fields`` attribute. This restricts the set of allowed field names
+    and prevents typos, raising ``KeyError`` when referring to undefined fields.
+    Additionally, fields can be used to define metadata and control the way
+    data is processed internally. Please refer to the :ref:`documentation
+    about fields <topics-items-fields>` for additional information.
+
+    Unlike instances of :class:`dict`, instances of :class:`Item` may be
+    :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
+    """
--- a/scrapy/linkextractors/init.py
+++ b/scrapy/linkextractors/init.py
@ -45,8 +45,14 @@ IGNORED_EXTENSIONS = [


 _re_type = type(re.compile("", 0))
-_matches = lambda url, regexs: any(r.search(url) for r in regexs)
-_is_valid_url = lambda url: url.split('://', 1)[0] in {'http', 'https', 'file', 'ftp'}
+
+
+def _matches(url, regexs):
+    return any(r.search(url) for r in regexs)
+
+
+def _is_valid_url(url):
+    return url.split('://', 1)[0] in {'http', 'https', 'file', 'ftp'}


 class FilteringLinkExtractor:
@ -55,8 +61,7 @@ class FilteringLinkExtractor:

    def __new__(cls, *args, **kwargs):
        from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
-        if (issubclass(cls, FilteringLinkExtractor) and
-                not issubclass(cls, LxmlLinkExtractor)):
+        if issubclass(cls, FilteringLinkExtractor) and not issubclass(cls, LxmlLinkExtractor):
            warn('scrapy.linkextractors.FilteringLinkExtractor is deprecated, '
                 'please use scrapy.linkextractors.LinkExtractor instead',
                 ScrapyDeprecationWarning, stacklevel=2)
@ -128,4 +133,4 @@ class FilteringLinkExtractor:


 # Top-level imports
-from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor as LinkExtractor  # noqa: F401
+from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor as LinkExtractor
--- a/scrapy/linkextractors/lxmlhtml.py
+++ b/scrapy/linkextractors/lxmlhtml.py
@ -1,6 +1,8 @@
 """
 Link extractor based on lxml.html
 """
+import operator
+from functools import partial
 from urllib.parse import urljoin

 import lxml.etree as etree
@ -8,10 +10,10 @@ from w3lib.html import strip_html5_whitespace
 from w3lib.url import canonicalize_url, safe_url_string

 from scrapy.link import Link
+from scrapy.linkextractors import FilteringLinkExtractor
 from scrapy.utils.misc import arg_to_iter, rel_has_nofollow
 from scrapy.utils.python import unique as unique_list
 from scrapy.utils.response import get_base_url
-from scrapy.linkextractors import FilteringLinkExtractor


 # from lxml/src/lxml/html/__init__.py
@ -27,19 +29,24 @@ def _nons(tag):
    return tag


+def _identity(x):
+    return x
+
+
+def _canonicalize_link_url(link):
+    return canonicalize_url(link.url, keep_fragments=True)
+
+
 class LxmlParserLinkExtractor:
-    def __init__(self, tag="a", attr="href", process=None, unique=False,
-                 strip=True, canonicalized=False):
-        self.scan_tag = tag if callable(tag) else lambda t: t == tag
-        self.scan_attr = attr if callable(attr) else lambda a: a == attr
-        self.process_attr = process if callable(process) else lambda v: v
+    def __init__(
+        self, tag="a", attr="href", process=None, unique=False, strip=True, canonicalized=False
+    ):
+        self.scan_tag = tag if callable(tag) else partial(operator.eq, tag)
+        self.scan_attr = attr if callable(attr) else partial(operator.eq, attr)
+        self.process_attr = process if callable(process) else _identity
        self.unique = unique
        self.strip = strip
-        if canonicalized:
-            self.link_key = lambda link: link.url
-        else:
-            self.link_key = lambda link: canonicalize_url(link.url,
-                                                          keep_fragments=True)
+        self.link_key = operator.attrgetter("url") if canonicalized else _canonicalize_link_url

    def _iter_links(self, document):
        for el in document.iter(etree.Element):
@ -93,27 +100,44 @@ class LxmlParserLinkExtractor:

 class LxmlLinkExtractor(FilteringLinkExtractor):

-    def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
-                 tags=('a', 'area'), attrs=('href',), canonicalize=False,
-                 unique=True, process_value=None, deny_extensions=None, restrict_css=(),
-                 strip=True, restrict_text=None):
+    def __init__(
+        self,
+        allow=(),
+        deny=(),
+        allow_domains=(),
+        deny_domains=(),
+        restrict_xpaths=(),
+        tags=('a', 'area'),
+        attrs=('href',),
+        canonicalize=False,
+        unique=True,
+        process_value=None,
+        deny_extensions=None,
+        restrict_css=(),
+        strip=True,
+        restrict_text=None,
+    ):
        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
-        tag_func = lambda x: x in tags
-        attr_func = lambda x: x in attrs
        lx = LxmlParserLinkExtractor(
-            tag=tag_func,
-            attr=attr_func,
+            tag=partial(operator.contains, tags),
+            attr=partial(operator.contains, attrs),
            unique=unique,
            process=process_value,
            strip=strip,
            canonicalized=canonicalize
        )
-
-        super(LxmlLinkExtractor, self).__init__(lx, allow=allow, deny=deny,
-                                                allow_domains=allow_domains, deny_domains=deny_domains,
-                                                restrict_xpaths=restrict_xpaths, restrict_css=restrict_css,
-                                                canonicalize=canonicalize, deny_extensions=deny_extensions,
-                                                restrict_text=restrict_text)
+        super(LxmlLinkExtractor, self).__init__(
+            link_extractor=lx,
+            allow=allow,
+            deny=deny,
+            allow_domains=allow_domains,
+            deny_domains=deny_domains,
+            restrict_xpaths=restrict_xpaths,
+            restrict_css=restrict_css,
+            canonicalize=canonicalize,
+            deny_extensions=deny_extensions,
+            restrict_text=restrict_text,
+        )

    def extract_links(self, response):
        """Returns a list of :class:`~scrapy.link.Link` objects from the
@ -126,9 +150,11 @@ class LxmlLinkExtractor(FilteringLinkExtractor):
        """
        base_url = get_base_url(response)
        if self.restrict_xpaths:
-            docs = [subdoc
+            docs = [
+                subdoc
                for x in self.restrict_xpaths
-                    for subdoc in response.xpath(x)]
+                for subdoc in response.xpath(x)
+            ]
        else:
            docs = [response.selector]
        all_links = []
--- a/scrapy/loader/init.py
+++ b/scrapy/loader/init.py
@ -6,6 +6,8 @@ See documentation in docs/topics/loaders.rst
 from collections import defaultdict
 from contextlib import suppress

+from itemadapter import ItemAdapter
+
 from scrapy.item import Item
 from scrapy.loader.common import wrap_loader_context
 from scrapy.loader.processors import Identity
@ -44,7 +46,7 @@ class ItemLoader:
        self._local_item = context['item'] = item
        self._local_values = defaultdict(list)
        # values from initial item
-        for field_name, value in item.items():
+        for field_name, value in ItemAdapter(item).items():
            self._values[field_name] += arg_to_iter(value)

    @property
@ -127,13 +129,12 @@ class ItemLoader:
        return value

    def load_item(self):
-        item = self.item
+        adapter = ItemAdapter(self.item)
        for field_name in tuple(self._values):
            value = self.get_output_value(field_name)
            if value is not None:
-                item[field_name] = value
-
-        return item
+                adapter[field_name] = value
+        return adapter.item

    def get_output_value(self, field_name):
        proc = self.get_output_processor(field_name)
@ -174,11 +175,8 @@ class ItemLoader:
                                    value, type(e).__name__, str(e)))

    def _get_item_field_attr(self, field_name, key, default=None):
-        if isinstance(self.item, Item):
-            value = self.item.fields[field_name].get(key, default)
-        else:
-            value = default
-        return value
+        field_meta = ItemAdapter(self.item).get_field_meta(field_name)
+        return field_meta.get(key, default)

    def _check_selector_method(self):
        if self.selector is None:
--- a/scrapy/mail.py
+++ b/scrapy/mail.py
@ -28,8 +28,10 @@ def _to_bytes_or_none(text):


 class MailSender:
-    def __init__(self, smtphost='localhost', mailfrom='scrapy@localhost',
-            smtpuser=None, smtppass=None, smtpport=25, smtptls=False, smtpssl=False, debug=False):
+    def __init__(
+        self, smtphost='localhost', mailfrom='scrapy@localhost', smtpuser=None,
+        smtppass=None, smtpport=25, smtptls=False, smtpssl=False, debug=False
+    ):
        self.smtphost = smtphost
        self.smtpport = smtpport
        self.smtpuser = _to_bytes_or_none(smtpuser)
@ -41,9 +43,15 @@ class MailSender:

    @classmethod
    def from_settings(cls, settings):
-        return cls(settings['MAIL_HOST'], settings['MAIL_FROM'], settings['MAIL_USER'],
-            settings['MAIL_PASS'], settings.getint('MAIL_PORT'),
-            settings.getbool('MAIL_TLS'), settings.getbool('MAIL_SSL'))
+        return cls(
+            smtphost=settings['MAIL_HOST'],
+            mailfrom=settings['MAIL_FROM'],
+            smtpuser=settings['MAIL_USER'],
+            smtppass=settings['MAIL_PASS'],
+            smtpport=settings.getint('MAIL_PORT'),
+            smtptls=settings.getbool('MAIL_TLS'),
+            smtpssl=settings.getbool('MAIL_SSL'),
+        )

    def send(self, to, subject, body, cc=None, attachs=(), mimetype='text/plain', charset=None, _callback=None):
        from twisted.internet import reactor
@ -89,9 +97,12 @@ class MailSender:
            return

        dfd = self._sendmail(rcpts, msg.as_string().encode(charset or 'utf-8'))
-        dfd.addCallbacks(self._sent_ok, self._sent_failed,
+        dfd.addCallbacks(
+            callback=self._sent_ok,
+            errback=self._sent_failed,
            callbackArgs=[to, cc, subject, len(attachs)],
-            errbackArgs=[to, cc, subject, len(attachs)])
+            errbackArgs=[to, cc, subject, len(attachs)],
+        )
        reactor.addSystemEventTrigger('before', 'shutdown', lambda: dfd)
        return dfd

@ -115,9 +126,10 @@ class MailSender:
        from twisted.mail.smtp import ESMTPSenderFactory
        msg = BytesIO(msg)
        d = defer.Deferred()
-        factory = ESMTPSenderFactory(self.smtpuser, self.smtppass, self.mailfrom, \
-            to_addrs, msg, d, heloFallback=True, requireAuthentication=False, \
-            requireTransportSecurity=self.smtptls)
+        factory = ESMTPSenderFactory(
+            self.smtpuser, self.smtppass, self.mailfrom, to_addrs, msg, d,
+            heloFallback=True, requireAuthentication=False, requireTransportSecurity=self.smtptls,
+        )
        factory.noisy = False

        if self.smtpssl:
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@ -10,24 +10,26 @@ import mimetypes
 import os
 import time
 from collections import defaultdict
-from email.utils import parsedate_tz, mktime_tz
+from contextlib import suppress
+from email.utils import mktime_tz, parsedate_tz
 from ftplib import FTP
 from io import BytesIO
 from urllib.parse import urlparse

+from itemadapter import ItemAdapter
 from twisted.internet import defer, threads

+from scrapy.exceptions import IgnoreRequest, NotConfigured
+from scrapy.http import Request
 from scrapy.pipelines.media import MediaPipeline
 from scrapy.settings import Settings
-from scrapy.exceptions import NotConfigured, IgnoreRequest
-from scrapy.http import Request
-from scrapy.utils.misc import md5sum
-from scrapy.utils.log import failure_to_exc_info
-from scrapy.utils.python import to_bytes
-from scrapy.utils.request import referer_str
 from scrapy.utils.boto import is_botocore
 from scrapy.utils.datatypes import CaselessDict
 from scrapy.utils.ftp import ftp_store_file
+from scrapy.utils.log import failure_to_exc_info
+from scrapy.utils.misc import md5sum
+from scrapy.utils.python import to_bytes
+from scrapy.utils.request import referer_str


 logger = logging.getLogger(__name__)
@ -83,8 +85,7 @@ class S3FilesStore:
    AWS_USE_SSL = None
    AWS_VERIFY = None

-    POLICY = 'private'  # Overriden from settings.FILES_STORE_S3_ACL in
-                        # FilesPipeline.from_settings.
+    POLICY = 'private'  # Overriden from settings.FILES_STORE_S3_ACL in FilesPipeline.from_settings
    HEADERS = {
        'Cache-Control': 'max-age=172800',
    }
@ -106,7 +107,8 @@ class S3FilesStore:
        else:
            from boto.s3.connection import S3Connection
            self.S3Connection = S3Connection
-        assert uri.startswith('s3://')
+        if not uri.startswith("s3://"):
+            raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri)
        self.bucket, self.prefix = uri[5:].split('/', 1)

    def stat_file(self, path, info):
@ -229,6 +231,20 @@ class GCSFilesStore:
        bucket, prefix = uri[5:].split('/', 1)
        self.bucket = client.bucket(bucket)
        self.prefix = prefix
+        permissions = self.bucket.test_iam_permissions(
+            ['storage.objects.get', 'storage.objects.create']
+        )
+        if 'storage.objects.get' not in permissions:
+            logger.warning(
+                "No 'storage.objects.get' permission for GSC bucket %(bucket)s. "
+                "Checking if files are up to date will be impossible. Files will be downloaded every time.",
+                {'bucket': bucket}
+            )
+        if 'storage.objects.create' not in permissions:
+            logger.error(
+                "No 'storage.objects.create' permission for GSC bucket %(bucket)s. Saving files will be impossible!",
+                {'bucket': bucket}
+            )

    def stat_file(self, path, info):
        def _onsuccess(blob):
@ -266,7 +282,8 @@ class FTPFilesStore:
    USE_ACTIVE_MODE = None

    def __init__(self, uri):
-        assert uri.startswith('ftp://')
+        if not uri.startswith("ftp://"):
+            raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri)
        u = urlparse(uri)
        self.port = u.port
        self.host = u.hostname
@ -417,7 +434,7 @@ class FilesPipeline(MediaPipeline):
            self.inc_stats(info.spider, 'uptodate')

            checksum = result.get('checksum', None)
-            return {'url': request.url, 'path': path, 'checksum': checksum}
+            return {'url': request.url, 'path': path, 'checksum': checksum, 'status': 'uptodate'}

        path = self.file_path(request, info=info)
        dfd = defer.maybeDeferred(self.store.stat_file, path, info)
@ -494,15 +511,16 @@ class FilesPipeline(MediaPipeline):
            )
            raise FileException(str(exc))

-        return {'url': request.url, 'path': path, 'checksum': checksum}
+        return {'url': request.url, 'path': path, 'checksum': checksum, 'status': status}

    def inc_stats(self, spider, status):
        spider.crawler.stats.inc_value('file_count', spider=spider)
        spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)

-    ### Overridable Interface
+    # Overridable Interface
    def get_media_requests(self, item, info):
-        return [Request(x) for x in item.get(self.files_urls_field, [])]
+        urls = ItemAdapter(item).get(self.files_urls_field, [])
+        return [Request(u) for u in urls]

    def file_downloaded(self, response, request, info):
        path = self.file_path(request, response=response, info=info)
@ -513,8 +531,8 @@ class FilesPipeline(MediaPipeline):
        return checksum

    def item_completed(self, results, item, info):
-        if isinstance(item, dict) or self.files_result_field in item.fields:
-            item[self.files_result_field] = [x for ok, x in results if ok]
+        with suppress(KeyError):
+            ItemAdapter(item)[self.files_result_field] = [x for ok, x in results if ok]
        return item

    def file_path(self, request, response=None, info=None):
--- a/scrapy/pipelines/images.py
+++ b/scrapy/pipelines/images.py
@ -5,17 +5,19 @@ See documentation in topics/media-pipeline.rst
 """
 import functools
 import hashlib
+from contextlib import suppress
 from io import BytesIO

+from itemadapter import ItemAdapter
 from PIL import Image

+from scrapy.exceptions import DropItem
+from scrapy.http import Request
+from scrapy.pipelines.files import FileException, FilesPipeline
+# TODO: from scrapy.pipelines.media import MediaPipeline
+from scrapy.settings import Settings
 from scrapy.utils.misc import md5sum
 from scrapy.utils.python import to_bytes
-from scrapy.http import Request
-from scrapy.settings import Settings
-from scrapy.exceptions import DropItem
-#TODO: from scrapy.pipelines.media import MediaPipeline
-from scrapy.pipelines.files import FileException, FilesPipeline


 class NoimagesDrop(DropItem):
@ -157,11 +159,12 @@ class ImagesPipeline(FilesPipeline):
        return image, buf

    def get_media_requests(self, item, info):
-        return [Request(x) for x in item.get(self.images_urls_field, [])]
+        urls = ItemAdapter(item).get(self.images_urls_field, [])
+        return [Request(u) for u in urls]

    def item_completed(self, results, item, info):
-        if isinstance(item, dict) or self.images_result_field in item.fields:
-            item[self.images_result_field] = [x for ok, x in results if ok]
+        with suppress(KeyError):
+            ItemAdapter(item)[self.images_result_field] = [x for ok, x in results if ok]
        return item

    def file_path(self, request, response=None, info=None):
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@ -1,7 +1,7 @@
 import functools
 import logging
 from collections import defaultdict
-from twisted.internet.defer import Deferred, DeferredList, _DefGen_Return
+from twisted.internet.defer import Deferred, DeferredList
 from twisted.python.failure import Failure

 from scrapy.settings import Settings
@ -43,8 +43,7 @@ class MediaPipeline:
        if allow_redirects:
            self.handle_httpstatus_list = SequenceExclude(range(300, 400))

-    def _key_for_pipe(self, key, base_class_name=None,
-                      settings=None):
+    def _key_for_pipe(self, key, base_class_name=None, settings=None):
        """
        >>> MediaPipeline()._key_for_pipe("IMAGES")
        'IMAGES'
@ -55,8 +54,11 @@ class MediaPipeline:
        """
        class_name = self.__class__.__name__
        formatted_key = "{}_{}".format(class_name.upper(), key)
-        if class_name == base_class_name or not base_class_name \
-            or (settings and not settings.get(formatted_key)):
+        if (
+            not base_class_name
+            or class_name == base_class_name
+            or settings and not settings.get(formatted_key)
+        ):
            return key
        return formatted_key

@ -141,24 +143,26 @@ class MediaPipeline:
            # This code fixes a memory leak by avoiding to keep references to
            # the Request and Response objects on the Media Pipeline cache.
            #
-            # Twisted inline callbacks pass return values using the function
-            # twisted.internet.defer.returnValue, which encapsulates the return
-            # value inside a _DefGen_Return base exception.
-            #
-            # What happens when the media_downloaded callback raises another
+            # What happens when the media_downloaded callback raises an
            # exception, for example a FileException('download-error') when
-            # the Response status code is not 200 OK, is that it stores the
-            # _DefGen_Return exception on the FileException context.
+            # the Response status code is not 200 OK, is that the original
+            # StopIteration exception (which in turn contains the failed
+            # Response and by extension, the original Request) gets encapsulated
+            # within the FileException context.
+            #
+            # Originally, Scrapy was using twisted.internet.defer.returnValue
+            # inside functions decorated with twisted.internet.defer.inlineCallbacks,
+            # encapsulating the returned Response in a _DefGen_Return exception
+            # instead of a StopIteration.
            #
            # To avoid keeping references to the Response and therefore Request
            # objects on the Media Pipeline cache, we should wipe the context of
-            # the exception encapsulated by the Twisted Failure when its a
-            # _DefGen_Return instance.
+            # the encapsulated exception when it is a StopIteration instance
            #
            # This problem does not occur in Python 2.7 since we don't have
            # Exception Chaining (https://www.python.org/dev/peps/pep-3134/).
            context = getattr(result.value, '__context__', None)
-            if isinstance(context, _DefGen_Return):
+            if isinstance(context, StopIteration):
                setattr(result.value, '__context__', None)

        info.downloading.remove(fp)
@ -166,7 +170,7 @@ class MediaPipeline:
        for wad in info.waiting.pop(fp):
            defer_result(result).chainDeferred(wad)

-    ### Overridable Interface
+    # Overridable Interface
    def media_to_download(self, request, info):
        """Check request before starting download"""
        pass
--- a/scrapy/responsetypes.py
+++ b/scrapy/responsetypes.py
@ -58,9 +58,9 @@ class ResponseTypes:

    def from_content_disposition(self, content_disposition):
        try:
-            filename = to_unicode(content_disposition,
-                encoding='latin-1', errors='replace').split(';')[1].split('=')[1]
-            filename = filename.strip('"\'')
+            filename = to_unicode(
+                content_disposition, encoding='latin-1', errors='replace'
+            ).split(';')[1].split('=')[1].strip('"\'')
            return self.from_filename(filename)
        except IndexError:
            return Response
@ -71,7 +71,7 @@ class ResponseTypes:
        cls = Response
        if b'Content-Type' in headers:
            cls = self.from_content_type(
-                content_type=headers[b'Content-type'],
+                content_type=headers[b'Content-Type'],
                content_encoding=headers.get(b'Content-Encoding')
            )
        if cls is Response and b'Content-Disposition' in headers:
--- a/scrapy/robotstxt.py
+++ b/scrapy/robotstxt.py
@ -17,10 +17,12 @@ def decode_robotstxt(robotstxt_body, spider, to_native_str_type=False):
    except UnicodeDecodeError:
        # If we found garbage or robots.txt in an encoding other than UTF-8, disregard it.
        # Switch to 'allow all' state.
-        logger.warning("Failure while parsing robots.txt. "
-                       "File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.",
+        logger.warning(
+            "Failure while parsing robots.txt. File either contains garbage or "
+            "is in an encoding other than UTF-8, treating it as an empty file.",
            exc_info=sys.exc_info(),
-                       extra={'spider': spider})
+            extra={'spider': spider},
+        )
        robotstxt_body = ''
    return robotstxt_body

--- a/scrapy/selector/init.py
+++ b/scrapy/selector/init.py
@ -1,4 +1,6 @@
 """
 Selectors
 """
-from scrapy.selector.unified import *  # noqa: F401
+
+# top-level imports
+from scrapy.selector.unified import Selector, SelectorList
--- a/scrapy/selector/unified.py
+++ b/scrapy/selector/unified.py
@ -65,7 +65,7 @@ class Selector(_ParselSelector, object_ref):
    selectorlist_cls = SelectorList

    def __init__(self, response=None, text=None, type=None, root=None, **kwargs):
-        if not(response is None or text is None):
+        if response is not None and text is not None:
            raise ValueError('%s.__init__() received both response and text'
                             % self.__class__.__name__)

--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@ -6,6 +6,7 @@ See documentation in docs/topics/shell.rst
 import os
 import signal

+from itemadapter import is_item
 from twisted.internet import threads, defer
 from twisted.python import threadable
 from w3lib.url import any_to_uri
@ -13,21 +14,18 @@ from w3lib.url import any_to_uri
 from scrapy.crawler import Crawler
 from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response
-from scrapy.item import BaseItem
 from scrapy.settings import Settings
 from scrapy.spiders import Spider
-from scrapy.utils.console import start_python_console
+from scrapy.utils.conf import get_config
+from scrapy.utils.console import DEFAULT_PYTHON_SHELLS, start_python_console
 from scrapy.utils.datatypes import SequenceExclude
 from scrapy.utils.misc import load_object
 from scrapy.utils.response import open_in_browser
-from scrapy.utils.conf import get_config
-from scrapy.utils.console import DEFAULT_PYTHON_SHELLS


 class Shell:

-    relevant_classes = (Crawler, Spider, Request, Response, BaseItem,
-                        Settings)
+    relevant_classes = (Crawler, Spider, Request, Response, Settings)

    def __init__(self, crawler, update_vars=None, code=None):
        self.crawler = crawler
@ -146,17 +144,16 @@ class Shell:
        b.append("Useful shortcuts:")
        if self.inthread:
            b.append("  fetch(url[, redirect=True]) "
-                     "Fetch URL and update local objects "
-                     "(by default, redirects are followed)")
+                     "Fetch URL and update local objects (by default, redirects are followed)")
            b.append("  fetch(req)                  "
                     "Fetch a scrapy.Request and update local objects ")
        b.append("  shelp()           Shell help (print this help)")
        b.append("  view(response)    View response in a browser")

-        return "\n".join("[s] %s" % l for l in b)
+        return "\n".join("[s] %s" % line for line in b)

    def _is_relevant(self, value):
-        return isinstance(value, self.relevant_classes)
+        return isinstance(value, self.relevant_classes) or is_item(value)


 def inspect_response(response, spider):
--- a/scrapy/signals.py
+++ b/scrapy/signals.py
@ -17,6 +17,7 @@ request_reached_downloader = object()
 request_left_downloader = object()
 response_received = object()
 response_downloaded = object()
+bytes_received = object()
 item_scraped = object()
 item_dropped = object()
 item_error = object()
--- a/scrapy/spiderloader.py
+++ b/scrapy/spiderloader.py
@ -1,7 +1,6 @@
-# -*- coding: utf-8 -*-
-from collections import defaultdict
 import traceback
 import warnings
+from collections import defaultdict

 from zope.interface import implementer

@ -16,6 +15,7 @@ class SpiderLoader:
    SpiderLoader is a class which locates and loads spiders
    in a Scrapy project.
    """
+
    def __init__(self, settings):
        self.spider_modules = settings.getlist('SPIDER_MODULES')
        self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY')
@ -24,16 +24,21 @@ class SpiderLoader:
        self._load_all_spiders()

    def _check_name_duplicates(self):
-        dupes = ["\n".join("  {cls} named {name!r} (in {module})".format(
-                                module=mod, cls=cls, name=name)
-                           for (mod, cls) in locations)
-                 for name, locations in self._found.items()
-                 if len(locations) > 1]
+        dupes = []
+        for name, locations in self._found.items():
+            dupes.extend([
+                "  {cls} named {name!r} (in {module})".format(module=mod, cls=cls, name=name)
+                for mod, cls in locations
+                if len(locations) > 1
+            ])
+
        if dupes:
-            msg = ("There are several spiders with the same name:\n\n"
-                   "{}\n\n  This can cause unexpected behavior.".format(
-                        "\n\n".join(dupes)))
-            warnings.warn(msg, UserWarning)
+            dupes_string = "\n\n".join(dupes)
+            warnings.warn(
+                "There are several spiders with the same name:\n\n"
+                "{}\n\n  This can cause unexpected behavior.".format(dupes_string),
+                category=UserWarning,
+            )

    def _load_spiders(self, module):
        for spcls in iter_spider_classes(module):
@ -45,12 +50,15 @@ class SpiderLoader:
            try:
                for module in walk_modules(name):
                    self._load_spiders(module)
-            except ImportError as e:
+            except ImportError:
                if self.warn_only:
-                    msg = ("\n{tb}Could not load spiders from module '{modname}'. "
+                    warnings.warn(
+                        "\n{tb}Could not load spiders from module '{modname}'. "
                        "See above traceback for details.".format(
-                                modname=name, tb=traceback.format_exc()))
-                    warnings.warn(msg, RuntimeWarning)
+                            modname=name, tb=traceback.format_exc()
+                        ),
+                        category=RuntimeWarning,
+                    )
                else:
                    raise
        self._check_name_duplicates()
@ -73,8 +81,10 @@ class SpiderLoader:
        """
        Return the list of spider names that can handle the given request.
        """
-        return [name for name, cls in self._spiders.items()
-                if cls.handles_request(request)]
+        return [
+            name for name, cls in self._spiders.items()
+            if cls.handles_request(request)
+        ]

    def list(self):
        """
--- a/scrapy/spidermiddlewares/referer.py
+++ b/scrapy/spidermiddlewares/referer.py
@ -163,9 +163,10 @@ class StrictOriginPolicy(ReferrerPolicy):
    name = POLICY_STRICT_ORIGIN

    def referrer(self, response_url, request_url):
-        if ((self.tls_protected(response_url) and
-             self.potentially_trustworthy(request_url))
-            or not self.tls_protected(response_url)):
+        if (
+            self.tls_protected(response_url) and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
            return self.origin_referrer(response_url)


@ -213,9 +214,10 @@ class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
        origin = self.origin(response_url)
        if origin == self.origin(request_url):
            return self.stripped_referrer(response_url)
-        elif ((self.tls_protected(response_url) and
-               self.potentially_trustworthy(request_url))
-              or not self.tls_protected(response_url)):
+        elif (
+            self.tls_protected(response_url) and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
            return self.origin_referrer(response_url)


--- a/scrapy/spiders/init.py
+++ b/scrapy/spiders/init.py
@ -110,6 +110,6 @@ class Spider(object_ref):


 # Top-level imports
-from scrapy.spiders.crawl import CrawlSpider, Rule  # noqa: F401
-from scrapy.spiders.feed import XMLFeedSpider, CSVFeedSpider  # noqa: F401
-from scrapy.spiders.sitemap import SitemapSpider  # noqa: F401
+from scrapy.spiders.crawl import CrawlSpider, Rule
+from scrapy.spiders.feed import XMLFeedSpider, CSVFeedSpider
+from scrapy.spiders.sitemap import SitemapSpider
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .0.0
 .2.0