Merge branch 'master' into response_ip_address

2025-03-18 17:29:05 +00:00 · 2020-03-03 13:53:43 -03:00 · 2020-03-03 13:53:43 -03:00 · 3aa5eab993
commit 3aa5eab993
parent 889b471852 a4dbb7754b
50 changed files with 934 additions and 133 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@ -1,8 +1,7 @@
 [bumpversion]
-current_version = 1.8.0
+current_version = 2.0.0
 commit = True
 tag = True
 tag_name = {new_version}

 [bumpversion:file:scrapy/VERSION]
-
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -1,9 +1,11 @@
 version: 2
 sphinx:
  configuration: docs/conf.py
+  fail_on_warning: true
 python:
  # For available versions, see:
  # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-image
  version: 3.7  # Keep in sync with .travis.yml
  install:
    - requirements: docs/requirements.txt
+    - path: .
--- a/README.rst
+++ b/README.rst
@ -41,7 +41,7 @@ Requirements
 ============

 * Python 3.5+
-* Works on Linux, Windows, Mac OSX, BSD
+* Works on Linux, Windows, macOS, BSD

 Install
 =======
--- a/docs/conf.py
+++ b/docs/conf.py
@ -281,6 +281,7 @@ coverage_ignore_pyobjects = [

 intersphinx_mapping = {
    'coverage': ('https://coverage.readthedocs.io/en/stable', None),
+    'cssselect': ('https://cssselect.readthedocs.io/en/latest', None),
    'pytest': ('https://docs.pytest.org/en/latest', None),
    'python': ('https://docs.python.org/3', None),
    'sphinx': ('https://www.sphinx-doc.org/en/master', None),
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@ -143,7 +143,7 @@ by running ``git fetch upstream pull/$PR_NUMBER/head:$BRANCH_NAME_TO_CREATE``
 (replace 'upstream' with a remote name for scrapy repository,
 ``$PR_NUMBER`` with an ID of the pull request, and ``$BRANCH_NAME_TO_CREATE``
 with a name of the branch you want to create locally).
-See also: https://help.github.com/articles/checking-out-pull-requests-locally/#modifying-an-inactive-pull-request-locally.
+See also: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/checking-out-pull-requests-locally#modifying-an-inactive-pull-request-locally.

 When writing GitHub pull requests, try to keep titles short but descriptive.
 E.g. For bug #411: "Scrapy hangs if an exception raises in start_requests"
@ -168,7 +168,7 @@ Scrapy:

 * Don't put your name in the code you contribute; git provides enough
  metadata to identify author of the code.
-  See https://help.github.com/articles/setting-your-username-in-git/ for
+  See https://help.github.com/en/github/using-git/setting-your-username-in-git for
  setup instructions.

 .. _documentation-policies:
@ -266,5 +266,5 @@ And their unit-tests are in::
 .. _tests/: https://github.com/scrapy/scrapy/tree/master/tests
 .. _open issues: https://github.com/scrapy/scrapy/issues
 .. _PEP 257: https://www.python.org/dev/peps/pep-0257/
-.. _pull request: https://help.github.com/en/articles/creating-a-pull-request
+.. _pull request: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request
 .. _pytest-xdist: https://github.com/pytest-dev/pytest-xdist
--- a/docs/faq.rst
+++ b/docs/faq.rst
@ -22,8 +22,8 @@ In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like
 comparing `jinja2`_ to `Django`_.

 .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
-.. _lxml: http://lxml.de/
-.. _jinja2: http://jinja.pocoo.org/
+.. _lxml: https://lxml.de/
+.. _jinja2: https://palletsprojects.com/p/jinja/
 .. _Django: https://www.djangoproject.com/

 Can I use Scrapy with BeautifulSoup?
@ -269,7 +269,7 @@ The ``__VIEWSTATE`` parameter is used in sites built with ASP.NET/VB.NET. For
 more info on how it works see `this page`_. Also, here's an `example spider`_
 which scrapes one of these sites.

-.. _this page: http://search.cpan.org/~ecarroll/HTML-TreeBuilderX-ASP_NET-0.09/lib/HTML/TreeBuilderX/ASP_NET.pm
+.. _this page: https://metacpan.org/pod/release/ECARROLL/HTML-TreeBuilderX-ASP_NET-0.09/lib/HTML/TreeBuilderX/ASP_NET.pm
 .. _example spider: https://github.com/AmbientLighter/rpn-fas/blob/master/fas/spiders/rnp.py

 What's the best way to parse big XML/CSV data feeds?
--- a/docs/index.rst
+++ b/docs/index.rst
@ -165,6 +165,8 @@ Solving specific problems
   topics/autothrottle
   topics/benchmarking
   topics/jobs
+   topics/coroutines
+   topics/asyncio

 :doc:`faq`
    Get answers to most frequently asked questions.
@ -205,6 +207,12 @@ Solving specific problems
 :doc:`topics/jobs`
    Learn how to pause and resume crawls for large spiders.

+:doc:`topics/coroutines`
+    Use the :ref:`coroutine syntax <async>`.
+
+:doc:`topics/asyncio`
+    Use :mod:`asyncio` and :mod:`asyncio`-powered libraries.
+
 .. _extending-scrapy:

 Extending Scrapy
--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@ -7,12 +7,12 @@ Installation guide
 Installing Scrapy
 =================

-Scrapy runs on Python 3.5 or above
-under CPython (default Python implementation) and PyPy (starting with PyPy 5.9).
+Scrapy runs on Python 3.5 or above under CPython (default Python
+implementation) and PyPy (starting with PyPy 5.9).

 If you're using `Anaconda`_ or `Miniconda`_, you can install the package from
 the `conda-forge`_ channel, which has up-to-date packages for Linux, Windows
-and OS X.
+and macOS.

 To install Scrapy using ``conda``, run::

@ -65,7 +65,7 @@ please refer to their respective installation instructions:
 * `lxml installation`_
 * `cryptography installation`_

-.. _lxml installation: http://lxml.de/installation.html
+.. _lxml installation: https://lxml.de/installation.html
 .. _cryptography installation: https://cryptography.io/en/latest/installation/


@ -148,11 +148,11 @@ you can install Scrapy with ``pip`` after that::

 .. _intro-install-macos:

-Mac OS X
--------
+macOS
+-----

 Building Scrapy's dependencies requires the presence of a C compiler and
-development headers. On OS X this is typically provided by Apple’s Xcode
+development headers. On macOS this is typically provided by Apple’s Xcode
 development tools. To install the Xcode command line tools open a terminal
 window and run::

@ -191,7 +191,7 @@ solutions:
 *   *(Optional)* :ref:`Install Scrapy inside a Python virtual environment
    <intro-using-virtualenv>`.

-  This method is a workaround for the above OS X issue, but it's an overall
+  This method is a workaround for the above macOS issue, but it's an overall
  good practice for managing dependencies and can complement the first method.

 After any of these workarounds you should be able to install Scrapy::
@ -207,7 +207,7 @@ For PyPy3, only Linux installation was tested.

 Most Scrapy dependencides now have binary wheels for CPython, but not for PyPy.
 This means that these dependecies will be built during installation.
-On OS X, you are likely to face an issue with building Cryptography dependency,
+On macOS, you are likely to face an issue with building Cryptography dependency,
 solution to this problem is described
 `here <https://github.com/pyca/cryptography/issues/2692#issuecomment-272773481>`_,
 that is to ``brew install openssl`` and then export the flags that this command
@ -253,11 +253,11 @@ For details, see `Issue #2473 <https://github.com/scrapy/scrapy/issues/2473>`_.
 .. _Python: https://www.python.org/
 .. _pip: https://pip.pypa.io/en/latest/installing/
 .. _lxml: https://lxml.de/index.html
-.. _parsel: https://pypi.python.org/pypi/parsel
-.. _w3lib: https://pypi.python.org/pypi/w3lib
-.. _twisted: https://twistedmatrix.com/
-.. _cryptography: https://cryptography.io/
-.. _pyOpenSSL: https://pypi.python.org/pypi/pyOpenSSL
+.. _parsel: https://pypi.org/project/parsel/
+.. _w3lib: https://pypi.org/project/w3lib/
+.. _twisted: https://twistedmatrix.com/trac/
+.. _cryptography: https://cryptography.io/en/latest/
+.. _pyOpenSSL: https://pypi.org/project/pyOpenSSL/
 .. _setuptools: https://pypi.python.org/pypi/setuptools
 .. _AUR Scrapy package: https://aur.archlinux.org/packages/scrapy/
 .. _homebrew: https://brew.sh/
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@ -306,7 +306,7 @@ with a selector (see :ref:`topics-developer-tools`).
 visually selected elements, which works in many browsers.

 .. _regular expressions: https://docs.python.org/3/library/re.html
-.. _Selector Gadget: http://selectorgadget.com/
+.. _Selector Gadget: https://selectorgadget.com/


 XPath: a brief intro
@ -337,7 +337,7 @@ recommend `this tutorial to learn XPath through examples
 <http://zvon.org/comp/r/tut-XPath_1.html>`_, and `this tutorial to learn "how
 to think in XPath" <http://plasmasturm.org/log/xpath101/>`_.

-.. _XPath: https://www.w3.org/TR/xpath
+.. _XPath: https://www.w3.org/TR/xpath/all/
 .. _CSS: https://www.w3.org/TR/selectors

 Extracting quotes and authors
--- a/docs/news.rst
+++ b/docs/news.rst
@ -3,8 +3,452 @@
 Release notes
 =============

-.. note:: Scrapy 1.x will be the last series supporting Python 2. Scrapy 2.0,
-          planned for Q4 2019 or Q1 2020, will support **Python 3 only**.
+.. _release-2.0.0:
+
+Scrapy 2.0.0 (2020-03-03)
+-------------------------
+
+Highlights:
+
+* Python 2 support has been removed
+* :doc:`Partial <topics/coroutines>` :ref:`coroutine syntax <async>` support
+  and :doc:`experimental <topics/asyncio>` :mod:`asyncio` support
+* New :meth:`Response.follow_all <scrapy.http.Response.follow_all>` method
+* :ref:`FTP support <media-pipeline-ftp>` for media pipelines
+* New :attr:`Response.certificate <scrapy.http.Response.certificate>`
+  attribute
+* IPv6 support through :setting:`DNS_RESOLVER`
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   Python 2 support has been removed, following `Python 2 end-of-life on
+    January 1, 2020`_ (:issue:`4091`, :issue:`4114`, :issue:`4115`,
+    :issue:`4121`, :issue:`4138`, :issue:`4231`, :issue:`4242`, :issue:`4304`,
+    :issue:`4309`, :issue:`4373`)
+
+*   Retry gaveups (see :setting:`RETRY_TIMES`) are now logged as errors instead
+    of as debug information (:issue:`3171`, :issue:`3566`)
+
+*   File extensions that
+    :class:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
+    ignores by default now also include ``7z``, ``7zip``, ``apk``, ``bz2``,
+    ``cdr``, ``dmg``, ``ico``, ``iso``, ``tar``, ``tar.gz``, ``webm``, and
+    ``xz`` (:issue:`1837`, :issue:`2067`, :issue:`4066`)
+
+*   The :setting:`METAREFRESH_IGNORE_TAGS` setting is now an empty list by
+    default, following web browser behavior (:issue:`3844`, :issue:`4311`)
+
+*   The
+    :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`
+    now includes spaces after commas in the value of the ``Accept-Encoding``
+    header that it sets, following web browser behavior (:issue:`4293`)
+
+*   The ``__init__`` method of custom download handlers (see
+    :setting:`DOWNLOAD_HANDLERS`) or subclasses of the following downloader
+    handlers  no longer receives a ``settings`` parameter:
+
+    *   :class:`scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler`
+
+    *   :class:`scrapy.core.downloader.handlers.file.FileDownloadHandler`
+
+    Use the ``from_settings`` or ``from_crawler`` class methods to expose such
+    a parameter to your custom download handlers.
+
+    (:issue:`4126`)
+
+*   We have refactored the :class:`scrapy.core.scheduler.Scheduler` class and
+    related queue classes (see :setting:`SCHEDULER_PRIORITY_QUEUE`,
+    :setting:`SCHEDULER_DISK_QUEUE` and :setting:`SCHEDULER_MEMORY_QUEUE`) to
+    make it easier to implement custom scheduler queue classes. See
+    :ref:`2-0-0-scheduler-queue-changes` below for details.
+
+*   Overridden settings are now logged in a different format. This is more in
+    line with similar information logged at startup (:issue:`4199`)
+
+.. _Python 2 end-of-life on January 1, 2020: https://www.python.org/doc/sunset-python-2/
+
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   The :ref:`Scrapy shell <topics-shell>` no longer provides a `sel` proxy
+    object, use :meth:`response.selector <scrapy.http.Response.selector>`
+    instead (:issue:`4347`)
+
+*   LevelDB support has been removed (:issue:`4112`)
+
+*   The following functions have been removed from :mod:`scrapy.utils.python`:
+    ``isbinarytext``, ``is_writable``, ``setattr_default``, ``stringify_dict``
+    (:issue:`4362`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   Using environment variables prefixed with ``SCRAPY_`` to override settings
+    is deprecated (:issue:`4300`, :issue:`4374`, :issue:`4375`)
+
+*   :class:`scrapy.linkextractors.FilteringLinkExtractor` is deprecated, use
+    :class:`scrapy.linkextractors.LinkExtractor
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>` instead (:issue:`4045`)
+
+*   The ``noconnect`` query string argument of proxy URLs is deprecated and
+    should be removed from proxy URLs (:issue:`4198`)
+
+*   The :meth:`next <scrapy.utils.python.MutableChain.next>` method of
+    :class:`scrapy.utils.python.MutableChain` is deprecated, use the global
+    :func:`next` function or :meth:`MutableChain.__next__
+    <scrapy.utils.python.MutableChain.__next__>` instead (:issue:`4153`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   Added :doc:`partial support <topics/coroutines>` for Python’s
+    :ref:`coroutine syntax <async>` and :doc:`experimental support
+    <topics/asyncio>` for :mod:`asyncio` and :mod:`asyncio`-powered libraries
+    (:issue:`4010`, :issue:`4259`, :issue:`4269`, :issue:`4270`, :issue:`4271`,
+    :issue:`4316`, :issue:`4318`)
+
+*   The new :meth:`Response.follow_all <scrapy.http.Response.follow_all>`
+    method offers the same functionality as
+    :meth:`Response.follow <scrapy.http.Response.follow>` but supports an
+    iterable of URLs as input and returns an iterable of requests
+    (:issue:`2582`, :issue:`4057`, :issue:`4286`)
+
+*   :ref:`Media pipelines <topics-media-pipeline>` now support :ref:`FTP
+    storage <media-pipeline-ftp>` (:issue:`3928`, :issue:`3961`)
+
+*   The new :attr:`Response.certificate <scrapy.http.Response.certificate>`
+    attribute exposes the SSL certificate of the server as a
+    :class:`twisted.internet.ssl.Certificate` object for HTTPS responses
+    (:issue:`2726`, :issue:`4054`)
+
+*   A new :setting:`DNS_RESOLVER` setting allows enabling IPv6 support
+    (:issue:`1031`, :issue:`4227`)
+
+*   A new :setting:`SCRAPER_SLOT_MAX_ACTIVE_SIZE` setting allows configuring
+    the existing soft limit that pauses request downloads when the total
+    response data being processed is too high (:issue:`1410`, :issue:`3551`)
+
+*   A new :setting:`TWISTED_REACTOR` setting allows customizing the
+    :mod:`~twisted.internet.reactor` that Scrapy uses, allowing to
+    :doc:`enable asyncio support <topics/asyncio>` or deal with a
+    :ref:`common macOS issue <faq-specific-reactor>` (:issue:`2905`,
+    :issue:`4294`)
+
+*   Scheduler disk and memory queues may now use the class methods
+    ``from_crawler`` or ``from_settings`` (:issue:`3884`)
+
+*   The new :attr:`Response.cb_kwargs <scrapy.http.Response.cb_kwargs>`
+    attribute serves as a shortcut for :attr:`Response.request.cb_kwargs
+    <scrapy.http.Request.cb_kwargs>` (:issue:`4331`)
+
+*   :meth:`Response.follow <scrapy.http.Response.follow>` now supports a
+    ``flags`` parameter, for consistency with :class:`~scrapy.http.Request`
+    (:issue:`4277`, :issue:`4279`)
+
+*   :ref:`Item loader processors <topics-loaders-processors>` can now be
+    regular functions, they no longer need to be methods (:issue:`3899`)
+
+*   :class:`~scrapy.spiders.Rule` now accepts an ``errback`` parameter
+    (:issue:`4000`)
+
+*   :class:`~scrapy.http.Request` no longer requires a ``callback`` parameter
+    when an ``errback`` parameter is specified (:issue:`3586`, :issue:`4008`)
+
+*   :class:`~scrapy.logformatter.LogFormatter` now supports some additional
+    methods:
+
+    *   :class:`~scrapy.logformatter.LogFormatter.download_error` for
+        download errors
+
+    *   :class:`~scrapy.logformatter.LogFormatter.item_error` for exceptions
+        raised during item processing by :ref:`item pipelines
+        <topics-item-pipeline>`
+
+    *   :class:`~scrapy.logformatter.LogFormatter.spider_error` for exceptions
+        raised from :ref:`spider callbacks <topics-spiders>`
+
+    (:issue:`374`, :issue:`3986`, :issue:`3989`, :issue:`4176`, :issue:`4188`)
+
+*   The :setting:`FEED_URI` setting now supports :class:`pathlib.Path` values
+    (:issue:`3731`, :issue:`4074`)
+
+*   A new :signal:`request_left_downloader` signal is sent when a request
+    leaves the downloader (:issue:`4303`)
+
+*   Scrapy logs a warning when it detects a request callback or errback that
+    uses ``yield`` but also returns a value, since the returned value would be
+    lost (:issue:`3484`, :issue:`3869`)
+
+*   :class:`~scrapy.spiders.Spider` objects now raise an :exc:`AttributeError`
+    exception if they do not have a :class:`~scrapy.spiders.Spider.start_urls`
+    attribute nor reimplement :class:`~scrapy.spiders.Spider.start_requests`,
+    but have a ``start_url`` attribute (:issue:`4133`, :issue:`4170`)
+
+*   :class:`~scrapy.exporters.BaseItemExporter` subclasses may now use
+    ``super().__init__(**kwargs)`` instead of ``self._configure(kwargs)`` in
+    their ``__init__`` method, passing ``dont_fail=True`` to the parent
+    ``__init__`` method if needed, and accessing ``kwargs`` at ``self._kwargs``
+    after calling their parent ``__init__`` method (:issue:`4193`,
+    :issue:`4370`)
+
+*   A new ``keep_fragments`` parameter of
+    :func:`scrapy.utils.request.request_fingerprint` allows to generate
+    different fingerprints for requests with different fragments in their URL
+    (:issue:`4104`)
+
+*   Download handlers (see :setting:`DOWNLOAD_HANDLERS`) may now use the
+    ``from_settings`` and ``from_crawler`` class methods that other Scrapy
+    components already supported (:issue:`4126`)
+
+*   :class:`scrapy.utils.python.MutableChain.__iter__` now returns ``self``,
+    `allowing it to be used as a sequence <https://lgtm.com/rules/4850080/>`_
+    (:issue:`4153`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   The :command:`crawl` command now also exits with exit code 1 when an
+    exception happens before the crawling starts (:issue:`4175`, :issue:`4207`)
+
+*   :class:`LinkExtractor.extract_links
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor.extract_links>` no longer
+    re-encodes the query string or URLs from non-UTF-8 responses in UTF-8
+    (:issue:`998`, :issue:`1403`, :issue:`1949`, :issue:`4321`)
+
+*   The first spider middleware (see :setting:`SPIDER_MIDDLEWARES`) now also
+    processes exceptions raised from callbacks that are generators
+    (:issue:`4260`, :issue:`4272`)
+
+*   Redirects to URLs starting with 3 slashes (``///``) are now supported
+    (:issue:`4032`, :issue:`4042`)
+
+*   :class:`~scrapy.http.Request` no longer accepts strings as ``url`` simply
+    because they have a colon (:issue:`2552`, :issue:`4094`)
+
+*   The correct encoding is now used for attach names in
+    :class:`~scrapy.mail.MailSender` (:issue:`4229`, :issue:`4239`)
+
+*   :class:`~scrapy.dupefilters.RFPDupeFilter`, the default
+    :setting:`DUPEFILTER_CLASS`, no longer writes an extra ``\r`` character on
+    each line in Windows, which made the size of the ``requests.seen`` file
+    unnecessarily large on that platform (:issue:`4283`)
+
+*   Z shell auto-completion now looks for ``.html`` files, not ``.http`` files,
+    and covers the ``-h`` command-line switch (:issue:`4122`, :issue:`4291`)
+
+*   Adding items to a :class:`scrapy.utils.datatypes.LocalCache` object
+    without a ``limit`` defined no longer raises a :exc:`TypeError` exception
+    (:issue:`4123`)
+
+*   Fixed a typo in the message of the :exc:`ValueError` exception raised when
+    :func:`scrapy.utils.misc.create_instance` gets both ``settings`` and
+    ``crawler`` set to ``None`` (:issue:`4128`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   API documentation now links to an online, syntax-highlighted view of the
+    corresponding source code (:issue:`4148`)
+
+*   Links to unexisting documentation pages now allow access to the sidebar
+    (:issue:`4152`, :issue:`4169`)
+
+*   Cross-references within our documentation now display a tooltip when
+    hovered (:issue:`4173`, :issue:`4183`)
+
+*   Improved the documentation about :meth:`LinkExtractor.extract_links
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor.extract_links>` and
+    simplified :ref:`topics-link-extractors` (:issue:`4045`)
+
+*   Clarified how :class:`ItemLoader.item <scrapy.loader.ItemLoader.item>`
+    works (:issue:`3574`, :issue:`4099`)
+
+*   Clarified that :func:`logging.basicConfig` should not be used when also
+    using :class:`~scrapy.crawler.CrawlerProcess` (:issue:`2149`,
+    :issue:`2352`, :issue:`3146`, :issue:`3960`)
+
+*   Clarified the requirements for :class:`~scrapy.http.Request` objects
+    :ref:`when using persistence <request-serialization>` (:issue:`4124`,
+    :issue:`4139`)
+
+*   Clarified how to install a :ref:`custom image pipeline
+    <media-pipeline-example>` (:issue:`4034`, :issue:`4252`)
+
+*   Fixed the signatures of the ``file_path`` method in :ref:`media pipeline
+    <topics-media-pipeline>` examples (:issue:`4290`)
+
+*   Covered a backward-incompatible change in Scrapy 1.7.0 affecting custom
+    :class:`scrapy.core.scheduler.Scheduler` subclasses (:issue:`4274`)
+
+*   Improved the ``README.rst`` and ``CODE_OF_CONDUCT.md`` files
+    (:issue:`4059`)
+
+*   Documentation examples are now checked as part of our test suite and we
+    have fixed some of the issues detected (:issue:`4142`, :issue:`4146`,
+    :issue:`4171`, :issue:`4184`, :issue:`4190`)
+
+*   Fixed logic issues, broken links and typos (:issue:`4247`, :issue:`4258`,
+    :issue:`4282`, :issue:`4288`, :issue:`4305`, :issue:`4308`, :issue:`4323`,
+    :issue:`4338`, :issue:`4359`, :issue:`4361`)
+
+*   Improved consistency when referring to the ``__init__`` method of an object
+    (:issue:`4086`, :issue:`4088`)
+
+*   Fixed an inconsistency between code and output in :ref:`intro-overview`
+    (:issue:`4213`)
+
+*   Extended :mod:`~sphinx.ext.intersphinx` usage (:issue:`4147`,
+    :issue:`4172`, :issue:`4185`, :issue:`4194`, :issue:`4197`)
+
+*   We now use a recent version of Python to build the documentation
+    (:issue:`4140`, :issue:`4249`)
+
+*   Cleaned up documentation (:issue:`4143`, :issue:`4275`)
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Re-enabled proxy ``CONNECT`` tests (:issue:`2545`, :issue:`4114`)
+
+*   Added Bandit_ security checks to our test suite (:issue:`4162`,
+    :issue:`4181`)
+
+*   Added Flake8_ style checks to our test suite and applied many of the
+    corresponding changes (:issue:`3944`, :issue:`3945`, :issue:`4137`,
+    :issue:`4157`, :issue:`4167`, :issue:`4174`, :issue:`4186`, :issue:`4195`,
+    :issue:`4238`, :issue:`4246`, :issue:`4355`, :issue:`4360`, :issue:`4365`)
+
+*   Improved test coverage (:issue:`4097`, :issue:`4218`, :issue:`4236`)
+
+*   Started reporting slowest tests, and improved the performance of some of
+    them (:issue:`4163`, :issue:`4164`)
+
+*   Fixed broken tests and refactored some tests (:issue:`4014`, :issue:`4095`,
+    :issue:`4244`, :issue:`4268`, :issue:`4372`)
+
+*   Modified the :doc:`tox <tox:index>` configuration to allow running tests
+    with any Python version, run Bandit_ and Flake8_ tests by default, and
+    enforce a minimum tox version programmatically (:issue:`4179`)
+
+*   Cleaned up code (:issue:`3937`, :issue:`4208`, :issue:`4209`,
+    :issue:`4210`, :issue:`4212`, :issue:`4369`, :issue:`4376`, :issue:`4378`)
+
+.. _Bandit: https://bandit.readthedocs.io/
+.. _Flake8: https://flake8.pycqa.org/en/latest/
+
+
+.. _2-0-0-scheduler-queue-changes:
+
+Changes to scheduler queue classes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following changes may impact any custom queue classes of all types:
+
+*   The ``push`` method no longer receives a second positional parameter
+    containing ``request.priority * -1``. If you need that value, get it
+    from the first positional parameter, ``request``, instead, or use
+    the new :meth:`~scrapy.core.scheduler.ScrapyPriorityQueue.priority`
+    method in :class:`scrapy.core.scheduler.ScrapyPriorityQueue`
+    subclasses.
+
+The following changes may impact custom priority queue classes:
+
+*   In the ``__init__`` method or the ``from_crawler`` or ``from_settings``
+    class methods:
+
+    *   The parameter that used to contain a factory function,
+        ``qfactory``, is now passed as a keyword parameter named
+        ``downstream_queue_cls``.
+
+    *   A new keyword parameter has been added: ``key``. It is a string
+        that is always an empty string for memory queues and indicates the
+        :setting:`JOB_DIR` value for disk queues.
+
+    *   The parameter for disk queues that contains data from the previous
+        crawl, ``startprios`` or ``slot_startprios``, is now passed as a
+        keyword parameter named ``startprios``.
+
+    *   The ``serialize`` parameter is no longer passed. The disk queue
+        class must take care of request serialization on its own before
+        writing to disk, using the
+        :func:`~scrapy.utils.reqser.request_to_dict` and
+        :func:`~scrapy.utils.reqser.request_from_dict` functions from the
+        :mod:`scrapy.utils.reqser` module.
+
+The following changes may impact custom disk and memory queue classes:
+
+*   The signature of the ``__init__`` method is now
+    ``__init__(self, crawler, key)``.
+
+The following changes affect specifically the
+:class:`~scrapy.core.scheduler.ScrapyPriorityQueue` and
+:class:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue` classes from
+:mod:`scrapy.core.scheduler` and may affect subclasses:
+
+*   In the ``__init__`` method, most of the changes described above apply.
+
+    ``__init__`` may still receive all parameters as positional parameters,
+    however:
+
+    *   ``downstream_queue_cls``, which replaced ``qfactory``, must be
+        instantiated differently.
+
+        ``qfactory`` was instantiated with a priority value (integer).
+
+        Instances of ``downstream_queue_cls`` should be created using
+        the new
+        :meth:`ScrapyPriorityQueue.qfactory <scrapy.core.scheduler.ScrapyPriorityQueue.qfactory>`
+        or
+        :meth:`DownloaderAwarePriorityQueue.pqfactory <scrapy.core.scheduler.DownloaderAwarePriorityQueue.pqfactory>`
+        methods.
+
+    *   The new ``key`` parameter displaced the ``startprios``
+        parameter 1 position to the right.
+
+*   The following class attributes have been added:
+
+    *   :attr:`~scrapy.core.scheduler.ScrapyPriorityQueue.crawler`
+
+    *   :attr:`~scrapy.core.scheduler.ScrapyPriorityQueue.downstream_queue_cls`
+        (details above)
+
+    *   :attr:`~scrapy.core.scheduler.ScrapyPriorityQueue.key` (details above)
+
+*   The ``serialize`` attribute has been removed (details above)
+
+The following changes affect specifically the
+:class:`~scrapy.core.scheduler.ScrapyPriorityQueue` class and may affect
+subclasses:
+
+*   A new :meth:`~scrapy.core.scheduler.ScrapyPriorityQueue.priority`
+    method has been added which, given a request, returns
+    ``request.priority * -1``.
+
+    It is used in :meth:`~scrapy.core.scheduler.ScrapyPriorityQueue.push`
+    to make up for the removal of its ``priority`` parameter.
+
+*   The ``spider`` attribute has been removed. Use
+    :attr:`crawler.spider <scrapy.core.scheduler.ScrapyPriorityQueue.crawler>`
+    instead.
+
+The following changes affect specifically the
+:class:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue` class and may
+affect subclasses:
+
+*   A new :attr:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue.pqueues`
+    attribute offers a mapping of downloader slot names to the
+    corresponding instances of
+    :attr:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue.downstream_queue_cls`.
+
+(:issue:`3884`)
+

 .. _release-1.8.0:

@ -26,7 +470,7 @@ Backward-incompatible changes
 *   Python 3.4 is no longer supported, and some of the minimum requirements of
    Scrapy have also changed:

-    *   cssselect_ 0.9.1
+    *   :doc:`cssselect <cssselect:index>` 0.9.1
    *   cryptography_ 2.0
    *   lxml_ 3.5.0
    *   pyOpenSSL_ 16.2.0
@ -288,12 +732,12 @@ Backward-incompatible changes
    :class:`~scrapy.http.Request` objects instead of arbitrary Python data
    structures.

-*   An additional ``crawler`` parameter has been added to the ``__init__`` method
-    of the :class:`scrapy.core.scheduler.Scheduler` class. 
-    Custom scheduler subclasses which don't accept arbitrary parameters in 
-    their ``__init__`` method might break because of this change.
+*   An additional ``crawler`` parameter has been added to the ``__init__``
+    method of the :class:`~scrapy.core.scheduler.Scheduler` class. Custom
+    scheduler subclasses which don't accept arbitrary parameters in their
+    ``__init__`` method might break because of this change.

-    For more information, refer to the documentation for the :setting:`SCHEDULER` setting.
+    For more information, see :setting:`SCHEDULER`.

 See also :ref:`1.7-deprecation-removals` below.

@ -1076,7 +1520,7 @@ Cleanups & Refactoring
 ~~~~~~~~~~~~~~~~~~~~~~

 - Tests: remove temp files and folders (:issue:`2570`),
-  fixed ProjectUtilsTest on OS X (:issue:`2569`),
+  fixed ProjectUtilsTest on macOS (:issue:`2569`),
  use portable pypy for Linux on Travis CI (:issue:`2710`)
 - Separate building request from ``_requests_to_follow`` in CrawlSpider (:issue:`2562`)
 - Remove “Python 3 progress” badge (:issue:`2567`)
@ -1616,7 +2060,7 @@ Deprecations and Removals
  + ``scrapy.utils.datatypes.SiteNode``

 - The previously bundled ``scrapy.xlib.pydispatch`` library was deprecated and
-  replaced by `pydispatcher <https://pypi.python.org/pypi/PyDispatcher>`_.
+  replaced by `pydispatcher <https://pypi.org/project/PyDispatcher/>`_.


 Relocations
@ -1645,7 +2089,7 @@ Bugfixes
 - Makes ``_monkeypatches`` more robust (:issue:`1634`).
 - Fixed bug on ``XMLItemExporter`` with non-string fields in
  items (:issue:`1738`).
- Fixed startproject command in OS X (:issue:`1635`).
+- Fixed startproject command in macOS (:issue:`1635`).
 - Fixed :class:`~scrapy.exporters.PythonItemExporter` and CSVExporter for
  non-string item types (:issue:`1737`).
 - Various logging related fixes (:issue:`1294`, :issue:`1419`, :issue:`1263`,
@ -1713,12 +2157,12 @@ Scrapy 1.0.4 (2015-12-30)
 - Typos corrections (:commit:`7067117`)
 - fix typos in downloader-middleware.rst and exceptions.rst, middlware -> middleware (:commit:`32f115c`)
 - Add note to Ubuntu install section about Debian compatibility (:commit:`23fda69`)
- Replace alternative OSX install workaround with virtualenv (:commit:`98b63ee`)
+- Replace alternative macOS install workaround with virtualenv (:commit:`98b63ee`)
 - Reference Homebrew's homepage for installation instructions (:commit:`1925db1`)
 - Add oldest supported tox version to contributing docs (:commit:`5d10d6d`)
 - Note in install docs about pip being already included in python>=2.7.9 (:commit:`85c980e`)
 - Add non-python dependencies to Ubuntu install section in the docs (:commit:`fbd010d`)
- Add OS X installation section to docs (:commit:`d8f4cba`)
+- Add macOS installation section to docs (:commit:`d8f4cba`)
 - DOC(ENH): specify path to rtd theme explicitly (:commit:`de73b1a`)
 - minor: scrapy.Spider docs grammar (:commit:`1ddcc7b`)
 - Make common practices sample code match the comments (:commit:`1b85bcf`)
@ -2450,7 +2894,7 @@ Other
 ~~~~~

 - Dropped Python 2.6 support (:issue:`448`)
- Add `cssselect`_ python package as install dependency
+- Add :doc:`cssselect <cssselect:index>` python package as install dependency
 - Drop libxml2 and multi selector's backend support, `lxml`_ is required from now on.
 - Minimum Twisted version increased to 10.0.0, dropped Twisted 8.0 support.
 - Running test suite now requires ``mock`` python library (:issue:`390`)
@ -2571,7 +3015,7 @@ Scrapy 0.18.0 (released 2013-08-09)
 - MetaRefreshMiddldeware and RedirectMiddleware have different priorities to address #62
 - added from_crawler method to spiders
 - added system tests with mock server
- more improvements to Mac OS compatibility (thanks Alex Cepoi)
+- more improvements to macOS compatibility (thanks Alex Cepoi)
 - several more cleanups to singletons and multi-spider support (thanks Nicolas Ramirez)
 - support custom download slots
 - added --spider option to "shell" command.
@ -2647,7 +3091,7 @@ Scrapy 0.16.3 (released 2012-12-07)

 - Remove concurrency limitation when using download delays and still ensure inter-request delays are enforced (:commit:`487b9b5`)
 - add error details when image pipeline fails (:commit:`8232569`)
- improve mac os compatibility (:commit:`8dcf8aa`)
+- improve macOS compatibility (:commit:`8dcf8aa`)
 - setup.py: use README.rst to populate long_description (:commit:`7b5310d`)
 - doc: removed obsolete references to ClientForm (:commit:`80f9bb6`)
 - correct docs for default storage backend (:commit:`2aa491b`)
@ -3047,17 +3491,16 @@ Scrapy 0.7
 First release of Scrapy.


-.. _AJAX crawleable urls: https://developers.google.com/webmasters/ajax-crawling/docs/getting-started?csw=1
+.. _AJAX crawleable urls: https://developers.google.com/search/docs/ajax-crawling/docs/getting-started?csw=1
 .. _botocore: https://github.com/boto/botocore
 .. _chunked transfer encoding: https://en.wikipedia.org/wiki/Chunked_transfer_encoding
 .. _ClientForm: http://wwwsearch.sourceforge.net/old/ClientForm/
 .. _Creating a pull request: https://help.github.com/en/articles/creating-a-pull-request
 .. _cryptography: https://cryptography.io/en/latest/
-.. _cssselect: https://github.com/scrapy/cssselect/
-.. _docstrings: https://docs.python.org/glossary.html#term-docstring
-.. _KeyboardInterrupt: https://docs.python.org/library/exceptions.html#KeyboardInterrupt
+.. _docstrings: https://docs.python.org/3/glossary.html#term-docstring
+.. _KeyboardInterrupt: https://docs.python.org/3/library/exceptions.html#KeyboardInterrupt
 .. _LevelDB: https://github.com/google/leveldb
-.. _lxml: http://lxml.de/
+.. _lxml: https://lxml.de/
 .. _marshal: https://docs.python.org/2/library/marshal.html
 .. _parsel.csstranslator.GenericTranslator: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.csstranslator.GenericTranslator
 .. _parsel.csstranslator.HTMLTranslator: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.csstranslator.HTMLTranslator
@ -3068,11 +3511,11 @@ First release of Scrapy.
 .. _queuelib: https://github.com/scrapy/queuelib
 .. _registered with IANA: https://www.iana.org/assignments/media-types/media-types.xhtml
 .. _resource: https://docs.python.org/2/library/resource.html
-.. _robots.txt: http://www.robotstxt.org/
+.. _robots.txt: https://www.robotstxt.org/
 .. _scrapely: https://github.com/scrapy/scrapely
 .. _service_identity: https://service-identity.readthedocs.io/en/stable/
 .. _six: https://six.readthedocs.io/
-.. _tox: https://pypi.python.org/pypi/tox
+.. _tox: https://pypi.org/project/tox/
 .. _Twisted: https://twistedmatrix.com/trac/
 .. _Twisted - hello, asynchronous programming: http://jessenoller.com/blog/2009/02/11/twisted-hello-asynchronous-programming/
 .. _w3lib: https://github.com/scrapy/w3lib
--- a/docs/topics/asyncio.rst
+++ b/docs/topics/asyncio.rst
@ -0,0 +1,28 @@
+=======
+asyncio
+=======
+
+.. versionadded:: 2.0
+
+Scrapy has partial support :mod:`asyncio`. After you :ref:`install the asyncio
+reactor <install-asyncio>`, you may use :mod:`asyncio` and
+:mod:`asyncio`-powered libraries in any :doc:`coroutine <coroutines>`.
+
+.. warning:: :mod:`asyncio` support in Scrapy is experimental. Future Scrapy
+             versions may introduce related changes without a deprecation
+             period or warning.
+
+.. _install-asyncio:
+
+Installing the asyncio reactor
+==============================
+
+To enable :mod:`asyncio` support, set the :setting:`TWISTED_REACTOR` setting to
+``'twisted.internet.asyncioreactor.AsyncioSelectorReactor'``.
+
+If you are using :class:`~scrapy.crawler.CrawlerRunner`, you also need to
+install the :class:`~twisted.internet.asyncioreactor.AsyncioSelectorReactor`
+reactor manually. You can do that using
+:func:`~scrapy.utils.reactor.install_reactor`::
+
+    install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')
--- a/docs/topics/broad-crawls.rst
+++ b/docs/topics/broad-crawls.rst
@ -188,7 +188,7 @@ AjaxCrawlMiddleware helps to crawl them correctly.
 It is turned OFF by default because it has some performance overhead,
 and enabling it for focused crawls doesn't make much sense.

-.. _ajax crawlable: https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
+.. _ajax crawlable: https://developers.google.com/search/docs/ajax-crawling/docs/getting-started

 .. _broad-crawls-bfo:

--- a/docs/topics/coroutines.rst
+++ b/docs/topics/coroutines.rst
@ -0,0 +1,110 @@
+==========
+Coroutines
+==========
+
+.. versionadded:: 2.0
+
+Scrapy has :ref:`partial support <coroutine-support>` for the
+:ref:`coroutine syntax <async>`.
+
+.. warning:: :mod:`asyncio` support in Scrapy is experimental. Future Scrapy
+             versions may introduce related API and behavior changes without a
+             deprecation period or warning.
+
+.. _coroutine-support:
+
+Supported callables
+===================
+
+The following callables may be defined as coroutines using ``async def``, and
+hence use coroutine syntax (e.g. ``await``, ``async for``, ``async with``):
+
+-   :class:`~scrapy.http.Request` callbacks.
+
+    The following are known caveats of the current implementation that we aim
+    to address in future versions of Scrapy:
+
+    -   The callback output is not processed until the whole callback finishes.
+
+        As a side effect, if the callback raises an exception, none of its
+        output is processed.
+
+    -   Because `asynchronous generators were introduced in Python 3.6`_, you
+        can only use ``yield`` if you are using Python 3.6 or later.
+
+        If you need to output multiple items or requests and you are using
+        Python 3.5, return an iterable (e.g. a list) instead.
+
+-   The :meth:`process_item` method of
+    :ref:`item pipelines <topics-item-pipeline>`.
+
+-   The
+    :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_request`,
+    :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_response`,
+    and
+    :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_exception`
+    methods of
+    :ref:`downloader middlewares <topics-downloader-middleware-custom>`.
+
+-   :ref:`Signal handlers that support deferreds <signal-deferred>`.
+
+.. _asynchronous generators were introduced in Python 3.6: https://www.python.org/dev/peps/pep-0525/
+
+Usage
+=====
+
+There are several use cases for coroutines in Scrapy. Code that would
+return Deferreds when written for previous Scrapy versions, such as downloader
+middlewares and signal handlers, can be rewritten to be shorter and cleaner::
+
+    class DbPipeline:
+        def _update_item(self, data, item):
+            item['field'] = data
+            return item
+
+        def process_item(self, item, spider):
+            dfd = db.get_some_data(item['id'])
+            dfd.addCallback(self._update_item, item)
+            return dfd
+
+becomes::
+
+    class DbPipeline:
+        async def process_item(self, item, spider):
+            item['field'] = await db.get_some_data(item['id'])
+            return item
+
+Coroutines may be used to call asynchronous code. This includes other
+coroutines, functions that return Deferreds and functions that return
+`awaitable objects`_ such as :class:`~asyncio.Future`. This means you can use
+many useful Python libraries providing such code::
+
+    class MySpider(Spider):
+        # ...
+        async def parse_with_deferred(self, response):
+            additional_response = await treq.get('https://additional.url')
+            additional_data = await treq.content(additional_response)
+            # ... use response and additional_data to yield items and requests
+
+        async def parse_with_asyncio(self, response):
+            async with aiohttp.ClientSession() as session:
+                async with session.get('https://additional.url') as additional_response:
+                    additional_data = await r.text()
+            # ... use response and additional_data to yield items and requests
+
+.. note:: Many libraries that use coroutines, such as `aio-libs`_, require the
+          :mod:`asyncio` loop and to use them you need to
+          :doc:`enable asyncio support in Scrapy<asyncio>`.
+
+Common use cases for asynchronous code include:
+
+* requesting data from websites, databases and other services (in callbacks,
+  pipelines and middlewares);
+* storing data in databases (in pipelines and middlewares);
+* delaying the spider initialization until some external event (in the
+  :signal:`spider_opened` handler);
+* calling asynchronous Scrapy methods like ``ExecutionEngine.download`` (see
+  :ref:`the screenshot pipeline example<ScreenshotPipeline>`).
+
+.. _aio-libs: https://github.com/aio-libs
+.. _awaitable objects: https://docs.python.org/3/glossary.html#term-awaitable
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@ -709,7 +709,7 @@ HttpCompressionMiddleware
   provided `brotlipy`_ is installed.

 .. _brotli-compressed: https://www.ietf.org/rfc/rfc7932.txt
-.. _brotlipy: https://pypi.python.org/pypi/brotlipy
+.. _brotlipy: https://pypi.org/project/brotlipy/

 HttpCompressionMiddleware Settings
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -872,6 +872,10 @@ Default: ``[]``

 Meta tags within these tags are ignored.

+.. versionchanged:: 2.0
+   The default value of :setting:`METAREFRESH_IGNORE_TAGS` changed from
+   ``['script', 'noscript']`` to ``[]``.
+
 .. setting:: METAREFRESH_MAXDELAY

 METAREFRESH_MAXDELAY
@ -1038,7 +1042,7 @@ Based on `RobotFileParser
 * is Python's built-in robots.txt_ parser

 * is compliant with `Martijn Koster's 1996 draft specification
-  <http://www.robotstxt.org/norobots-rfc.txt>`_
+  <https://www.robotstxt.org/norobots-rfc.txt>`_

 * lacks support for wildcard matching

@ -1061,7 +1065,7 @@ Based on `Reppy <https://github.com/seomoz/reppy/>`_:
  <https://github.com/seomoz/rep-cpp>`_

 * is compliant with `Martijn Koster's 1996 draft specification
-  <http://www.robotstxt.org/norobots-rfc.txt>`_
+  <https://www.robotstxt.org/norobots-rfc.txt>`_

 * supports wildcard matching

@ -1086,7 +1090,7 @@ Based on `Robotexclusionrulesparser <http://nikitathespider.com/python/rerp/>`_:
 * implemented in Python

 * is compliant with `Martijn Koster's 1996 draft specification
-  <http://www.robotstxt.org/norobots-rfc.txt>`_
+  <https://www.robotstxt.org/norobots-rfc.txt>`_

 * supports wildcard matching

@ -1115,7 +1119,7 @@ implementing the methods described below.
 .. autoclass:: RobotParser
   :members:

-.. _robots.txt: http://www.robotstxt.org/
+.. _robots.txt: https://www.robotstxt.org/

 DownloaderStats
 ---------------
@ -1155,7 +1159,7 @@ AjaxCrawlMiddleware

   Middleware that finds 'AJAX crawlable' page variants based
   on meta-fragment html tag. See
-   https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
+   https://developers.google.com/search/docs/ajax-crawling/docs/getting-started
   for more info.

   .. note::
--- a/docs/topics/dynamic-content.rst
+++ b/docs/topics/dynamic-content.rst
@ -241,12 +241,12 @@ along with `scrapy-selenium`_ for seamless integration.
 .. _headless browser: https://en.wikipedia.org/wiki/Headless_browser
 .. _JavaScript: https://en.wikipedia.org/wiki/JavaScript
 .. _js2xml: https://github.com/scrapinghub/js2xml
-.. _json.loads: https://docs.python.org/library/json.html#json.loads
+.. _json.loads: https://docs.python.org/3/library/json.html#json.loads
 .. _pytesseract: https://github.com/madmaze/pytesseract
-.. _regular expression: https://docs.python.org/library/re.html
+.. _regular expression: https://docs.python.org/3/library/re.html
 .. _scrapy-selenium: https://github.com/clemfromspace/scrapy-selenium
 .. _scrapy-splash: https://github.com/scrapy-plugins/scrapy-splash
-.. _Selenium: https://www.seleniumhq.org/
+.. _Selenium: https://www.selenium.dev/
 .. _Splash: https://github.com/scrapinghub/splash
 .. _tabula-py: https://github.com/chezou/tabula-py
 .. _wget: https://www.gnu.org/software/wget/
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@ -137,7 +137,7 @@ output examples, which assume you're exporting these two items::
 BaseItemExporter
 ----------------

-.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=0)
+.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=0, dont_fail=False)

   This is the (abstract) base class for all Item Exporters. It provides
   support for common features used by all (concrete) Item Exporters, such as
@ -148,6 +148,9 @@ BaseItemExporter
   populate their respective instance attributes: :attr:`fields_to_export`,
   :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`.

+   .. versionadded:: 2.0
+      The *dont_fail* parameter.
+
   .. method:: export_item(item)

      Exports the given item. This method must be implemented in subclasses.
--- a/docs/topics/feed-exports.rst
+++ b/docs/topics/feed-exports.rst
@ -236,6 +236,9 @@ supported URI schemes.

 This setting is required for enabling the feed exports.

+.. versionchanged:: 2.0
+   Added :class:`pathlib.Path` support.
+
 .. setting:: FEED_FORMAT

 FEED_FORMAT
--- a/docs/topics/item-pipeline.rst
+++ b/docs/topics/item-pipeline.rst
@ -158,18 +158,20 @@ method and how to clean up the resources properly.::
            self.db[self.collection_name].insert_one(dict(item))
            return item

-.. _MongoDB: https://www.mongodb.org/
-.. _pymongo: https://api.mongodb.org/python/current/
+.. _MongoDB: https://www.mongodb.com/
+.. _pymongo: https://api.mongodb.com/python/current/


+.. _ScreenshotPipeline:
+
 Take screenshot of item
 -----------------------

 This example demonstrates how to return a
 :class:`~twisted.internet.defer.Deferred` from the :meth:`process_item` method.
 It uses Splash_ to render screenshot of item url. Pipeline
-makes request to locally running instance of Splash_. After request is downloaded
-and Deferred callback fires, it saves item to a file and adds filename to an item.
+makes request to locally running instance of Splash_. After request is downloaded,
+it saves the screenshot to a file and adds filename to the item.

 ::

@ -184,15 +186,12 @@ and Deferred callback fires, it saves item to a file and adds filename to an ite

        SPLASH_URL = "http://localhost:8050/render.png?url={}"

-        def process_item(self, item, spider):
+        async def process_item(self, item, spider):
            encoded_item_url = quote(item["url"])
            screenshot_url = self.SPLASH_URL.format(encoded_item_url)
            request = scrapy.Request(screenshot_url)
-            dfd = spider.crawler.engine.download(request, spider)
-            dfd.addBoth(self.return_item, item)
-            return dfd
+            response = await spider.crawler.engine.download(request, spider)

-        def return_item(self, response, item):
            if response.status != 200:
                # Error happened, return item.
                return item
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@ -166,7 +166,7 @@ If your item contains mutable_ values like lists or dictionaries, a shallow
 copy will keep references to the same mutable values across all different
 copies.

-.. _mutable: https://docs.python.org/glossary.html#term-mutable
+.. _mutable: https://docs.python.org/3/glossary.html#term-mutable

 For example, if you have an item with a list of tags, and you create a shallow
 copy of that item, both the original item and the copy have the same list of
@ -177,7 +177,7 @@ If that is not the desired behavior, use a deep copy instead.

 See the `documentation of the copy module`_ for more information.

-.. _documentation of the copy module: https://docs.python.org/library/copy.html
+.. _documentation of the copy module: https://docs.python.org/3/library/copy.html

 To create a shallow copy of an item, you can either call
 :meth:`~scrapy.item.Item.copy` on an existing item
--- a/docs/topics/jobs.rst
+++ b/docs/topics/jobs.rst
@ -68,6 +68,9 @@ Cookies may expire. So, if you don't resume your spider quickly the requests
 scheduled may no longer work. This won't be an issue if you spider doesn't rely
 on cookies.

+
+.. _request-serialization:
+
 Request serialization
 ---------------------

--- a/docs/topics/leaks.rst
+++ b/docs/topics/leaks.rst
@ -206,7 +206,7 @@ objects. If this is your case, and you can't find your leaks using ``trackref``,
 you still have another resource: the `Guppy library`_.
 If you're using Python3, see :ref:`topics-leaks-muppy`.

-.. _Guppy library: https://pypi.python.org/pypi/guppy
+.. _Guppy library: https://pypi.org/project/guppy/

 If you use ``pip``, you can install Guppy with the following command::

@ -311,9 +311,9 @@ though neither Scrapy nor your project are leaking memory. This is due to a
 (not so well) known problem of Python, which may not return released memory to
 the operating system in some cases. For more information on this issue see:

-* `Python Memory Management <http://www.evanjones.ca/python-memory.html>`_
-* `Python Memory Management Part 2 <http://www.evanjones.ca/python-memory-part2.html>`_
-* `Python Memory Management Part 3 <http://www.evanjones.ca/python-memory-part3.html>`_
+* `Python Memory Management <https://www.evanjones.ca/python-memory.html>`_
+* `Python Memory Management Part 2 <https://www.evanjones.ca/python-memory-part2.html>`_
+* `Python Memory Management Part 3 <https://www.evanjones.ca/python-memory-part3.html>`_

 The improvements proposed by Evan Jones, which are detailed in `this paper`_,
 got merged in Python 2.5, but this only reduces the problem, it doesn't fix it
@ -327,7 +327,7 @@ completely. To quote the paper:
    to move to a compacting garbage collector, which is able to move objects in
    memory. This would require significant changes to the Python interpreter.*

-.. _this paper: http://www.evanjones.ca/memoryallocator/
+.. _this paper: https://www.evanjones.ca/memoryallocator/

 To keep memory consumption reasonable you can split the job into several
 smaller jobs or enable :ref:`persistent job queue <topics-jobs>`
--- a/docs/topics/link-extractors.rst
+++ b/docs/topics/link-extractors.rst
@ -64,9 +64,13 @@ LxmlLinkExtractor

    :param deny_extensions: a single value or list of strings containing
        extensions that should be ignored when extracting links.
-        If not given, it will default to the
-        ``IGNORED_EXTENSIONS`` list defined in the
-        `scrapy.linkextractors`_ package.
+        If not given, it will default to
+        :data:`scrapy.linkextractors.IGNORED_EXTENSIONS`.
+
+        .. versionchanged:: 2.0
+           :data:`~scrapy.linkextractors.IGNORED_EXTENSIONS` now includes
+           ``7z``, ``7zip``, ``apk``, ``bz2``, ``cdr``, ``dmg``, ``ico``,
+           ``iso``, ``tar``, ``tar.gz``, ``webm``, and ``xz``.
    :type deny_extensions: list

    :param restrict_xpaths: is an XPath (or list of XPath's) which defines
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@ -136,6 +136,9 @@ with the data to be parsed, and return a parsed value. So you can use any
 function as input or output processor. The only requirement is that they must
 accept one (and only one) positional argument, which will be an iterable.

+.. versionchanged:: 2.0
+   Processors no longer need to be methods.
+
 .. note:: Both input and output processors must receive an iterable as their
   first argument. The output of those functions can be anything. The result of
   input processors will be appended to an internal list (in the Loader)
--- a/docs/topics/media-pipeline.rst
+++ b/docs/topics/media-pipeline.rst
@ -116,12 +116,6 @@ For the Images Pipeline, set the :setting:`IMAGES_STORE` setting::
 Supported Storage
 =================

-File system is currently the only officially supported storage, but there are
-also support for storing files in `Amazon S3`_ and `Google Cloud Storage`_.
-
-.. _Amazon S3: https://aws.amazon.com/s3/
-.. _Google Cloud Storage: https://cloud.google.com/storage/
-
 File system storage
 -------------------

@ -147,9 +141,13 @@ Where:
 * ``full`` is a sub-directory to separate full images from thumbnails (if
  used). For more info see :ref:`topics-images-thumbnails`.

+.. _media-pipeline-ftp:
+
 FTP server storage
 ------------------

+.. versionadded:: 2.0
+
 :setting:`FILES_STORE` and :setting:`IMAGES_STORE` can point to an FTP server.
 Scrapy will automatically upload the files to the server.

@ -573,6 +571,8 @@ See here the methods that you can override in your custom Images Pipeline:
      By default, the :meth:`item_completed` method returns the item.


+.. _media-pipeline-example:
+
 Custom Images pipeline example
 ==============================

--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@ -31,6 +31,8 @@ Request objects
    a :class:`Response`.

    :param url: the URL of this request
+
+        If the URL is invalid, a :exc:`ValueError` exception is raised.
    :type url: string

    :param callback: the function that will be called with the response of this
@ -125,6 +127,10 @@ Request objects
       :exc:`~twisted.python.failure.Failure` as first parameter.
       For more information,
       see :ref:`topics-request-response-ref-errbacks` below.
+
+       .. versionchanged:: 2.0
+          The *callback* parameter is no longer required when the *errback*
+          parameter is specified.
    :type errback: callable

    :param flags:  Flags sent to the request, can be used for logging or similar purposes.
@ -396,7 +402,7 @@ The FormRequest class extends the base :class:`Request` with functionality for
 dealing with HTML forms. It uses `lxml.html forms`_  to pre-populate form
 fields with form data from :class:`Response` objects.

-.. _lxml.html forms: http://lxml.de/lxmlhtml.html#forms
+.. _lxml.html forms: https://lxml.de/lxmlhtml.html#forms

 .. class:: FormRequest(url, [formdata, ...])

@ -680,6 +686,8 @@ Response objects

    .. attribute:: Response.cb_kwargs

+        .. versionadded:: 2.0
+
        A shortcut to the :attr:`Request.cb_kwargs` attribute of the
        :attr:`Response.request` object (i.e. ``self.request.cb_kwargs``).

--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@ -35,12 +35,11 @@ defines selectors to associate those styles with specific HTML elements.
    in speed and parsing accuracy to lxml.

 .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
-.. _lxml: http://lxml.de/
+.. _lxml: https://lxml.de/
 .. _ElementTree: https://docs.python.org/2/library/xml.etree.elementtree.html
-.. _cssselect: https://pypi.python.org/pypi/cssselect/
-.. _XPath: https://www.w3.org/TR/xpath
+.. _XPath: https://www.w3.org/TR/xpath/all/
 .. _CSS: https://www.w3.org/TR/selectors
-.. _parsel: https://parsel.readthedocs.io/
+.. _parsel: https://parsel.readthedocs.io/en/latest/

 Using selectors
 ===============
@ -255,7 +254,7 @@ that Scrapy (parsel) implements a couple of **non-standard pseudo-elements**:
    They will most probably not work with other libraries like
    `lxml`_ or `PyQuery`_.

-.. _PyQuery: https://pypi.python.org/pypi/pyquery
+.. _PyQuery: https://pypi.org/project/pyquery/

 Examples:

@ -309,7 +308,7 @@ Examples:
    make much sense: text nodes do not have attributes, and attribute values
    are string values already and do not have children nodes.

-.. _CSS Selectors: https://www.w3.org/TR/css3-selectors/#selectors
+.. _CSS Selectors: https://www.w3.org/TR/selectors-3/#selectors

 .. _topics-selectors-nesting-selectors:

@ -504,7 +503,7 @@ Another common case would be to extract all direct ``<p>`` children:
 For more details about relative XPaths see the `Location Paths`_ section in the
 XPath specification.

-.. _Location Paths: https://www.w3.org/TR/xpath#location-paths
+.. _Location Paths: https://www.w3.org/TR/xpath/all/#location-paths

 When querying by class, consider using CSS
 ------------------------------------------
@ -612,7 +611,7 @@ But using the ``.`` to mean the node, works:
 >>> sel.xpath("//a[contains(., 'Next Page')]").getall()
 ['<a href="#">Click here to go to the <strong>Next Page</strong></a>']

-.. _`XPath string function`: https://www.w3.org/TR/xpath/#section-String-Functions
+.. _`XPath string function`: https://www.w3.org/TR/xpath/all/#section-String-Functions

 .. _topics-selectors-xpath-variables:

@ -764,7 +763,7 @@ Set operations
 These can be handy for excluding parts of a document tree before
 extracting text elements for example.

-Example extracting microdata (sample content taken from http://schema.org/Product)
+Example extracting microdata (sample content taken from https://schema.org/Product)
 with groups of itemscopes and corresponding itemprops::

    >>> doc = u"""
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@ -381,6 +381,8 @@ DNS in-memory cache size.
 DNS_RESOLVER
 ------------

+.. versionadded:: 2.0
+
 Default: ``'scrapy.resolver.CachingThreadedResolver'``

 The class to be used to resolve DNS names. The default ``scrapy.resolver.CachingThreadedResolver``
@ -1258,6 +1260,9 @@ does not work together with :setting:`CONCURRENT_REQUESTS_PER_IP`.

 SCRAPER_SLOT_MAX_ACTIVE_SIZE
 ----------------------------
+
+.. versionadded:: 2.0
+
 Default: ``5_000_000``

 Soft limit (in bytes) for response data being processed.
@ -1447,24 +1452,36 @@ in the ``project`` subdirectory.
 TWISTED_REACTOR
 ---------------

+.. versionadded:: 2.0
+
 Default: ``None``

-Import path of a given Twisted reactor, for instance:
-:class:`twisted.internet.asyncioreactor.AsyncioSelectorReactor`.
+Import path of a given :mod:`~twisted.internet.reactor`.

-Scrapy will install this reactor if no other is installed yet, such as when
-the ``scrapy`` CLI program is invoked or when using the
-:class:`~scrapy.crawler.CrawlerProcess` class. If you are using the
-:class:`~scrapy.crawler.CrawlerRunner` class, you need to install the correct
-reactor manually. An exception will be raised if the installation fails.
+Scrapy will install this reactor if no other reactor is installed yet, such as
+when the ``scrapy`` CLI program is invoked or when using the
+:class:`~scrapy.crawler.CrawlerProcess` class.

-The default value for this option is currently ``None``, which means that Scrapy
-will not attempt to install any specific reactor, and the default one defined by
-Twisted for the current platform will be used. This is to maintain backward
-compatibility and avoid possible problems caused by using a non-default reactor.
+If you are using the :class:`~scrapy.crawler.CrawlerRunner` class, you also
+need to install the correct reactor manually. You can do that using
+:func:`~scrapy.utils.reactor.install_reactor`:

-For additional information, please see
-:doc:`core/howto/choosing-reactor`.
+.. autofunction:: scrapy.utils.reactor.install_reactor
+
+If a reactor is already installed,
+:func:`~scrapy.utils.reactor.install_reactor` has no effect.
+
+:meth:`CrawlerRunner.__init__ <scrapy.crawler.CrawlerRunner.__init__>` raises
+:exc:`Exception` if the installed reactor does not match the
+:setting:`TWISTED_REACTOR` setting.
+
+The default value of the :setting:`TWISTED_REACTOR` setting is ``None``, which
+means that Scrapy will not attempt to install any specific reactor, and the
+default reactor defined by Twisted for the current platform will be used. This
+is to maintain backward compatibility and avoid possible problems caused by
+using a non-default reactor.
+
+For additional information, see :doc:`core/howto/choosing-reactor`.


 .. setting:: URLLENGTH_LIMIT
--- a/docs/topics/shell.rst
+++ b/docs/topics/shell.rst
@ -41,7 +41,7 @@ variable; or by defining it in your :ref:`scrapy.cfg <topics-config-settings>`::

 .. _IPython: https://ipython.org/
 .. _IPython installation guide: https://ipython.org/install.html
-.. _bpython: https://www.bpython-interpreter.org/
+.. _bpython: https://bpython-interpreter.org/

 Launch the shell
 ================
@ -142,7 +142,7 @@ Example of shell session
 ========================

 Here's an example of a typical shell session where we start by scraping the
-https://scrapy.org page, and then proceed to scrape the https://reddit.com
+https://scrapy.org page, and then proceed to scrape the https://old.reddit.com/
 page. Finally, we modify the (Reddit) request method to POST and re-fetch it
 getting an error. We end the session by typing Ctrl-D (in Unix systems) or
 Ctrl-Z in Windows.
@ -182,7 +182,7 @@ After that, we can start playing with the objects:
 >>> response.xpath('//title/text()').get()
 'Scrapy | A Fast and Powerful Scraping and Web Crawling Framework'

->>> fetch("https://reddit.com")
+>>> fetch("https://old.reddit.com/")

 >>> response.xpath('//title/text()').get()
 'reddit: the front page of the internet'
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@ -46,6 +46,7 @@ Here is a simple example showing how you can catch signals and perform some acti
        def parse(self, response):
            pass

+.. _signal-deferred:

 Deferred signal handlers
 ========================
@ -301,6 +302,8 @@ request_left_downloader
 .. signal:: request_left_downloader
 .. function:: request_left_downloader(request, spider)

+    .. versionadded:: 2.0
+
    Sent when a :class:`~scrapy.http.Request` leaves the downloader, even in case of
    failure.

--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@ -299,8 +299,8 @@ The spider will not do any parsing on its own.
 If you were to set the ``start_urls`` attribute from the command line,
 you would have to parse it on your own into a list
 using something like
-`ast.literal_eval <https://docs.python.org/library/ast.html#ast.literal_eval>`_
-or `json.loads <https://docs.python.org/library/json.html#json.loads>`_
+`ast.literal_eval <https://docs.python.org/3/library/ast.html#ast.literal_eval>`_
+or `json.loads <https://docs.python.org/3/library/json.html#json.loads>`_
 and then set it as an attribute.
 Otherwise, you would cause iteration over a ``start_urls`` string
 (a very common python pitfall)
@ -420,6 +420,9 @@ Crawling rules
   It receives a :class:`Twisted Failure <twisted.python.failure.Failure>`
   instance as first parameter.

+   .. versionadded:: 2.0
+      The *errback* parameter.
+
 CrawlSpider example
 ~~~~~~~~~~~~~~~~~~~

@ -811,6 +814,6 @@ Combine SitemapSpider with other sources of urls::

 .. _Sitemaps: https://www.sitemaps.org/index.html
 .. _Sitemap index files: https://www.sitemaps.org/protocol.html#index
-.. _robots.txt: http://www.robotstxt.org/
+.. _robots.txt: https://www.robotstxt.org/
 .. _TLD: https://en.wikipedia.org/wiki/Top-level_domain
 .. _Scrapyd documentation: https://scrapyd.readthedocs.io/en/latest/
--- a/scrapy/VERSION
+++ b/scrapy/VERSION
@ -1 +1 @@
-1.8.0
+2.0.0
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@ -54,8 +54,13 @@ class Command(ScrapyCommand):
            raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported")
        spname = args[0]

-        self.crawler_process.crawl(spname, **opts.spargs)
-        self.crawler_process.start()
+        crawl_defer = self.crawler_process.crawl(spname, **opts.spargs)

-        if self.crawler_process.bootstrap_failed:
+        if getattr(crawl_defer, 'result', None) is not None and issubclass(crawl_defer.result.type, Exception):
            self.exitcode = 1
+        else:
+            self.crawler_process.start()
+
+            if self.crawler_process.bootstrap_failed or \
+                    (hasattr(self.crawler_process, 'has_exception') and self.crawler_process.has_exception):
+                self.exitcode = 1
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@ -23,7 +23,7 @@ __all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',

 class BaseItemExporter(object):

-    def __init__(self, dont_fail=False, **kwargs):
+    def __init__(self, *, dont_fail=False, **kwargs):
        self._kwargs = kwargs
        self._configure(kwargs, dont_fail=dont_fail)

--- a/scrapy/extensions/memusage.py
+++ b/scrapy/extensions/memusage.py
@ -47,7 +47,7 @@ class MemoryUsage(object):
    def get_virtual_size(self):
        size = self.resource.getrusage(self.resource.RUSAGE_SELF).ru_maxrss
        if sys.platform != 'darwin':
-            # on Mac OS X ru_maxrss is in bytes, on Linux it is in KB
+            # on macOS ru_maxrss is in bytes, on Linux it is in KB
            size *= 1024
        return size

--- a/scrapy/http/response/init.py
+++ b/scrapy/http/response/init.py
@ -132,6 +132,9 @@ class Response(object_ref):
        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
        method which supports selectors in addition to absolute/relative URLs
        and Link objects.
+
+        .. versionadded:: 2.0
+           The *flags* parameter.
        """
        if isinstance(url, Link):
            url = url.url
@ -160,6 +163,8 @@ class Response(object_ref):
                   dont_filter=False, errback=None, cb_kwargs=None, flags=None):
        # type: (...) -> Generator[Request, None, None]
        """
+        .. versionadded:: 2.0
+
        Return an iterable of :class:`~.Request` instances to follow all links
        in ``urls``. It accepts the same arguments as ``Request.__init__`` method,
        but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects,
--- a/scrapy/logformatter.py
+++ b/scrapy/logformatter.py
@ -97,7 +97,11 @@ class LogFormatter(object):
        }

    def item_error(self, item, exception, response, spider):
-        """Logs a message when an item causes an error while it is passing through the item pipeline."""
+        """Logs a message when an item causes an error while it is passing
+        through the item pipeline.
+
+        .. versionadded:: 2.0
+        """
        return {
            'level': logging.ERROR,
            'msg': ITEMERRORMSG,
@ -107,7 +111,10 @@ class LogFormatter(object):
        }

    def spider_error(self, failure, request, response, spider):
-        """Logs an error message from a spider."""
+        """Logs an error message from a spider.
+
+        .. versionadded:: 2.0
+        """
        return {
            'level': logging.ERROR,
            'msg': SPIDERERRORMSG,
@ -118,7 +125,11 @@ class LogFormatter(object):
        }

    def download_error(self, failure, request, spider, errmsg=None):
-        """Logs a download error message from a spider (typically coming from the engine)."""
+        """Logs a download error message from a spider (typically coming from
+        the engine).
+
+        .. versionadded:: 2.0
+        """
        args = {'request': request}
        if errmsg:
            msg = DOWNLOADERRORMSG_LONG
--- a/scrapy/resolver.py
+++ b/scrapy/resolver.py
@ -29,7 +29,7 @@ class CachingThreadedResolver(ThreadedResolver):
            cache_size = 0
        return cls(reactor, cache_size, crawler.settings.getfloat('DNS_TIMEOUT'))

-    def install_on_reactor(self,):
+    def install_on_reactor(self):
        self.reactor.installResolver(self)

    def getHostByName(self, name, timeout=None):
--- a/scrapy/settings/deprecated.py
+++ b/scrapy/settings/deprecated.py
@ -9,10 +9,8 @@ DEPRECATED_SETTINGS = [
    ('ENCODING_ALIASES', 'no longer needed (encoding discovery uses w3lib now)'),
    ('STATS_ENABLED', 'no longer supported (change STATS_CLASS instead)'),
    ('SQLITE_DB', 'no longer supported'),
-    ('SELECTORS_BACKEND', 'use SCRAPY_SELECTORS_BACKEND environment variable instead'),
    ('AUTOTHROTTLE_MIN_DOWNLOAD_DELAY', 'use DOWNLOAD_DELAY instead'),
    ('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'),
-    ('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'),
    ('REDIRECT_MAX_METAREFRESH_DELAY', 'use METAREFRESH_MAXDELAY instead'),
    ('LOG_UNSERIALIZABLE_REQUESTS', 'use SCHEDULER_DEBUG instead'),
 ]
--- a/scrapy/utils/project.py
+++ b/scrapy/utils/project.py
@ -75,9 +75,24 @@ def get_project_settings():
                      "is deprecated.", ScrapyDeprecationWarning)
        settings.setdict(pickle.loads(pickled_settings), priority='project')

-    env_overrides = {k[7:]: v for k, v in os.environ.items() if
-                     k.startswith('SCRAPY_')}
-    if env_overrides:
-        warnings.warn("Use of 'SCRAPY_'-prefixed environment variables to override settings is deprecated.", ScrapyDeprecationWarning)
-        settings.setdict(env_overrides, priority='project')
+    scrapy_envvars = {k[7:]: v for k, v in os.environ.items() if
+                      k.startswith('SCRAPY_')}
+    valid_envvars = {
+        'CHECK',
+        'PICKLED_SETTINGS_TO_OVERRIDE',
+        'PROJECT',
+        'PYTHON_SHELL',
+        'SETTINGS_MODULE',
+    }
+    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
+    if setting_envvars:
+        setting_envvar_list = ', '.join(sorted(setting_envvars))
+        warnings.warn(
+            'Use of environment variables prefixed with SCRAPY_ to override '
+            'settings is deprecated. The following environment variables are '
+            'currently defined: {}'.format(setting_envvar_list),
+            ScrapyDeprecationWarning
+        )
+    settings.setdict(scrapy_envvars, priority='project')
+
    return settings
--- a/scrapy/utils/reactor.py
+++ b/scrapy/utils/reactor.py
@ -50,6 +50,8 @@ class CallLaterOnce(object):


 def install_reactor(reactor_path):
+    """Installs the :mod:`~twisted.internet.reactor` with the specified
+    import path."""
    reactor_class = load_object(reactor_path)
    if reactor_class is asyncioreactor.AsyncioSelectorReactor:
        with suppress(error.ReactorAlreadyInstalledError):
@ -63,6 +65,9 @@ def install_reactor(reactor_path):


 def verify_installed_reactor(reactor_path):
+    """Raises :exc:`Exception` if the installed
+    :mod:`~twisted.internet.reactor` does not match the specified import
+    path."""
    from twisted.internet import reactor
    reactor_class = load_object(reactor_path)
    if not isinstance(reactor, reactor_class):
--- a/tests/test_cmdline_crawl_with_pipeline/init.py
+++ b/tests/test_cmdline_crawl_with_pipeline/init.py
@ -0,0 +1,20 @@
+import os
+import sys
+import unittest
+from subprocess import Popen, PIPE
+
+
+class CmdlineCrawlPipelineTest(unittest.TestCase):
+
+    def _execute(self, spname):
+        args = (sys.executable, '-m', 'scrapy.cmdline', 'crawl', spname)
+        cwd = os.path.dirname(os.path.abspath(__file__))
+        proc = Popen(args, stdout=PIPE, stderr=PIPE, cwd=cwd)
+        proc.communicate()
+        return proc.returncode
+
+    def test_open_spider_normally_in_pipeline(self):
+        self.assertEqual(self._execute('normal'), 0)
+
+    def test_exception_at_open_spider_in_pipeline(self):
+        self.assertEqual(self._execute('exception'), 1)
--- a/tests/test_cmdline_crawl_with_pipeline/scrapy.cfg
+++ b/tests/test_cmdline_crawl_with_pipeline/scrapy.cfg
@ -0,0 +1,2 @@
+[settings]
+default = test_spider.settings
--- a/tests/test_cmdline_crawl_with_pipeline/test_spider/init.py
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/init.py
--- a/tests/test_cmdline_crawl_with_pipeline/test_spider/pipelines.py
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/pipelines.py
@ -0,0 +1,16 @@
+class TestSpiderPipeline(object):
+
+    def open_spider(self, spider):
+        pass
+
+    def process_item(self, item, spider):
+        return item
+
+
+class TestSpiderExceptionPipeline(object):
+
+    def open_spider(self, spider):
+        raise Exception('exception')
+
+    def process_item(self, item, spider):
+        return item
--- a/tests/test_cmdline_crawl_with_pipeline/test_spider/settings.py
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/settings.py
@ -0,0 +1,2 @@
+BOT_NAME = 'test_spider'
+SPIDER_MODULES = ['test_spider.spiders']
--- a/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/init.py
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/init.py
--- a/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/exception.py
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/exception.py
@ -0,0 +1,14 @@
+import scrapy
+
+
+class ExceptionSpider(scrapy.Spider):
+    name = 'exception'
+
+    custom_settings = {
+        'ITEM_PIPELINES': {
+            'test_spider.pipelines.TestSpiderExceptionPipeline': 300
+        }
+    }
+
+    def parse(self, response):
+        pass
--- a/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/normal.py
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/normal.py
@ -0,0 +1,14 @@
+import scrapy
+
+
+class NormalSpider(scrapy.Spider):
+    name = 'normal'
+
+    custom_settings = {
+        'ITEM_PIPELINES': {
+            'test_spider.pipelines.TestSpiderPipeline': 300
+        }
+    }
+
+    def parse(self, response):
+        pass
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@ -40,7 +40,7 @@ class CrawlTestCase(TestCase):

    @defer.inlineCallbacks
    def test_fixed_delay(self):
-        yield self._test_delay(total=3, delay=0.1)
+        yield self._test_delay(total=3, delay=0.2)

    @defer.inlineCallbacks
    def test_randomized_delay(self):
@ -328,7 +328,7 @@ with multiples lines
    @mark.only_asyncio()
    @defer.inlineCallbacks
    def test_async_def_asyncio_parse(self):
-        runner = CrawlerRunner({"ASYNCIO_REACTOR": True})
+        runner = CrawlerRunner({"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor"})
        runner.crawl(AsyncDefAsyncioSpider, self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
        with LogCapture() as log:
            yield runner.join()
--- a/tests/test_utils_project.py
+++ b/tests/test_utils_project.py
@ -3,7 +3,11 @@ import os
 import tempfile
 import shutil
 import contextlib
-from scrapy.utils.project import data_path
+
+from pytest import warns
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.project import data_path, get_project_settings


@contextlib.contextmanager
@ -41,3 +45,53 @@ class ProjectUtilsTest(unittest.TestCase):
            )
            abspath = os.path.join(os.path.sep, 'absolute', 'path')
            self.assertEqual(abspath, data_path(abspath))
+
+
+@contextlib.contextmanager
+def set_env(**update):
+    modified = set(update.keys()) & set(os.environ.keys())
+    update_after = {k: os.environ[k] for k in modified}
+    remove_after = frozenset(k for k in update if k not in os.environ)
+    try:
+        os.environ.update(update)
+        yield
+    finally:
+        os.environ.update(update_after)
+        for k in remove_after:
+            os.environ.pop(k)
+
+
+class GetProjectSettingsTestCase(unittest.TestCase):
+
+    def test_valid_envvar(self):
+        value = 'tests.test_cmdline.settings'
+        envvars = {
+            'SCRAPY_SETTINGS_MODULE': value,
+        }
+        with set_env(**envvars), warns(None) as warnings:
+            settings = get_project_settings()
+        assert not warnings
+        assert settings.get('SETTINGS_MODULE') == value
+
+    def test_invalid_envvar(self):
+        envvars = {
+            'SCRAPY_FOO': 'bar',
+        }
+        with set_env(**envvars), warns(None) as warnings:
+            get_project_settings()
+        assert len(warnings) == 1
+        assert warnings[0].category == ScrapyDeprecationWarning
+        assert str(warnings[0].message).endswith(': FOO')
+
+    def test_valid_and_invalid_envvars(self):
+        value = 'tests.test_cmdline.settings'
+        envvars = {
+            'SCRAPY_FOO': 'bar',
+            'SCRAPY_SETTINGS_MODULE': value,
+        }
+        with set_env(**envvars), warns(None) as warnings:
+            settings = get_project_settings()
+        assert len(warnings) == 1
+        assert warnings[0].category == ScrapyDeprecationWarning
+        assert str(warnings[0].message).endswith(': FOO')
+        assert settings.get('SETTINGS_MODULE') == value
 @ -1 +1 @@
 .8.0
 .0.0